Pupil size light reflex to a light test is a potential test to determine a person is under the influence of THC and be able to be use as a field sobriety measures. The goals of the analysis to:
Pupil size light reflex to light does not habituated to THC use (smoking).
Time for smoking to post test maybe an important variable to model b/c:
There is a circadian pattern to pupil size (https://www.ncbi.nlm.nih.gov/pmc/articles/PMC7445830/).
Currently this field does not use FDA, so any FDA in topic area may be publishable
Data on pupil size during light reflex test. Pupil size was extracted at the image level based on image analysis techniques. Each test was performed simultaneously on right and left eye before and after THC use (smoking).
ps <- read.csv(file.path(data_folder, ps_folder, "pupil_data_with_demo_20220926.csv"))
ps$demo_gender <- factor(ps$demo_gender,
levels = c(1,2),
labels = c("Male", "Female"))
ps$user_cat <- NA
ps$user_cat[ps$user_type == "non-user"] <- 0
ps$user_cat[ps$user_type == "occasional"] <- 1
ps$user_cat[ps$user_type == "daily"] <- 2
ps$user_cat <- factor(ps$user_cat,
levels = c(0,1,2),
labels = c("non-user",
"occasional",
"daily"))
ps$tp <- NA
ps$tp[ps$time == "pre2"] <- 0
ps$tp[ps$time == "post"] <- 1
ps$tp <- factor(ps$tp,
levels = c(0,1),
labels = c("pre2", "post"))
# str(ps)
## -- Not needed with Ben's revised file missing frame numbers recorded
# #impute frame number
# for(i in 2:(nrow(ps)-1)){
# if(is.na(ps$frame[i]) & is.na(ps$frame[i+1] - ps$frame[i-1])){
# ps$frame[i] <- ps$frame[i-1]+1
# }else(if(is.na(ps$frame[i]) & (ps$frame[i+1] - ps$frame[i-1]) == 2){
# ps$frame[i] <- ps$frame[i-1]+1
# }else(if(is.na(ps$frame[i]) & (ps$frame[i+1] - ps$frame[i-1]) > 2){
# if(abs(ps$percent_change[i] - ps$percent_change[i-1]) <
# abs(ps$percent_change[i+1] - ps$percent_change[i])){
# ps$frame[i] <- ps$frame[i-1]+1
# }else(ps$frame[i] <- ps$frame[i+1]-1)
# }
# )
# )
# }
# total number of subjects
length(unique(ps$subject_id))
## [1] 101
# subject level data
pt.df <- unique(ps[, c("subject_id", "tp", "user_cat", "demo_age",
"demo_weight", "demo_height", "demo_gender", "thc")])
# # Demographics Table by User Category
# table1(~ demo_age + demo_weight + demo_height + demo_gender|user_cat,
# data = pt.df[pt.df$tp == "pre2", ])
# number of subjects by timepoint (pre/post)
table(pt.df$tp)
##
## pre2 post
## 99 95
There are more subjects in total than the by time point. Indicating incomplete data with some subjects missing “pre” and others missing “post” measurement.
Add scalar features for each participant-time point
scalar.feat <- read.csv(file.path(data_folder, ps_folder, "scalars_trim.csv"),
header = T, stringsAsFactors = F)
scalar.feat$subject_id <- substr(scalar.feat$timeid, 1, 7)
scalar.feat$tp <- substr(scalar.feat$timeid, 8, 11)
scalar.featR <- scalar.feat[scalar.feat$eye == "Right", ]
pt.df <- merge(pt.df, scalar.featR[, c("subject_id", "tp", "min_constriction",
"duration", "avg_end_percent", "slope",
"AUC", "eye")],
by = c("subject_id", "tp"))
Reshape participant demog data to preserve pre and post THC levels and scalar features
pt.df.w <- reshape(pt.df,
timevar = "tp",
idvar = c("subject_id", "user_cat", "demo_age",
"demo_weight", "demo_height", "demo_gender", "eye"),
direction = "wide")
pt.df.w$thc.post[pt.df.w$user_cat == "non-user" & is.na(pt.df.w$thc.post)] <- 0
pt.df.w$thc_chg <- pt.df.w$thc.post - pt.df.w$thc.pre2
pt.df.w$BMI <- pt.df.w$demo_weight*703/(pt.df.w$demo_height)^2
pt.df.w$min_constriction_chg <- pt.df.w$min_constriction.post - pt.df.w$min_constriction.pre2
pt.df.w$AUC_chg <- pt.df.w$AUC.post - pt.df.w$AUC.pre2
pt.df.w$duration_chg <- pt.df.w$duration.post - pt.df.w$duration.pre2
pt.df.w$avg_end_percent_chg <- pt.df.w$avg_end_percent.post - pt.df.w$avg_end_percent.pre2
pt.df.w$slope_chg <- pt.df.w$slope.post - pt.df.w$slope.pre2
## Need to work on; time formatted in Excel file.
testTimes <- read.xlsx(file.path(data_folder, ps_folder, "All SafetyScan Times_23Aug2022_revSG.xlsx"),
sheet = "Raw")
testTimes$pre_safetyscan_date_convert <- as.Date(testTimes$pre_safetyscan_date, origin = "1899-12-30")
testTimes$pre_safetyscan_time_convert <- as.POSIXct(paste0(testTimes$pre_safetyscan_date_convert, " ", testTimes$pre_safetyscan_time_hr, ":", testTimes$pre_safetyscan_time_min, ":", "00"), format = "%Y-%m-%d %H:%M:%S")
testTimes$consump_start_time_convert <- as.POSIXct(paste0(testTimes$pre_safetyscan_date_convert, " ", testTimes$consump_start_time_hr, ":", testTimes$consump_start_time_min, ":", "00"), format = "%Y-%m-%d %H:%M:%S")
testTimes$consump_end_time_convert <- as.POSIXct(paste0(testTimes$pre_safetyscan_date_convert, " ", testTimes$consump_end_time_hr, ":", testTimes$consump_end_time_min, ":", "00"), format = "%Y-%m-%d %H:%M:%S")
testTimes$post_safetyscan_time_convert <- as.POSIXct(paste0(testTimes$pre_safetyscan_date_convert, " ", testTimes$post_safetyscan_time_hr, ":", testTimes$post_safetyscan_time_min, ":", "00"), format = "%Y-%m-%d %H:%M:%S")
testTimes$postConsumpTimeToTest <- as.numeric(difftime(testTimes$post_safetyscan_time_convert,
testTimes$consump_end_time_convert,
units = "mins"))
testTimes$remove <- ifelse(testTimes$subject_id == "001-056" & is.na(testTimes$postConsumpTimeToTest), 1, 0)
testTimes <- testTimes[testTimes$remove == 0, ]
pt.df <- merge(pt.df.w, testTimes[, c("subject_id", "postConsumpTimeToTest")],
by = "subject_id")
ps <- merge(ps, testTimes[, c("subject_id", "postConsumpTimeToTest")],
by = "subject_id")
non_user.id <- pt.df$subject_id[pt.df$user_cat == "non-user"]
# View(testTimes[testTimes$subject_id %in% non_user.id, ])
No Consumption end time recorded for non-users
# Demographics Table by User Category
table1(~ demo_age + demo_weight+ demo_height+BMI + demo_gender + thc_chg + postConsumpTimeToTest + min_constriction_chg + AUC_chg + duration_chg + slope_chg + avg_end_percent_chg|user_cat,
data = pt.df)
| non-user (N=32) |
occasional (N=36) |
daily (N=33) |
Overall (N=101) |
|
|---|---|---|---|---|
| demo_age | ||||
| Mean (SD) | 32.9 (4.90) | 31.6 (4.98) | 33.5 (5.69) | 32.6 (5.21) |
| Median [Min, Max] | 33.5 [25.7, 42.2] | 30.1 [25.1, 41.9] | 32.6 [25.4, 45.3] | 32.3 [25.1, 45.3] |
| demo_weight | ||||
| Mean (SD) | 171 (48.5) | 173 (33.4) | 176 (33.1) | 173 (38.4) |
| Median [Min, Max] | 165 [85.0, 284] | 170 [105, 240] | 175 [113, 250] | 170 [85.0, 284] |
| demo_height | ||||
| Mean (SD) | 68.0 (4.83) | 69.6 (3.60) | 68.1 (3.52) | 68.6 (4.04) |
| Median [Min, Max] | 67.0 [60.0, 78.0] | 69.5 [61.0, 76.0] | 69.0 [59.0, 76.0] | 69.0 [59.0, 78.0] |
| BMI | ||||
| Mean (SD) | 25.5 (4.89) | 25.0 (4.02) | 26.7 (4.42) | 25.7 (4.46) |
| Median [Min, Max] | 24.5 [16.6, 34.2] | 24.5 [16.9, 32.6] | 25.8 [18.9, 34.9] | 25.1 [16.6, 34.9] |
| demo_gender | ||||
| Male | 13 (40.6%) | 23 (63.9%) | 18 (54.5%) | 54 (53.5%) |
| Female | 19 (59.4%) | 13 (36.1%) | 15 (45.5%) | 47 (46.5%) |
| thc_chg | ||||
| Mean (SD) | 0 (0) | 8.20 (9.71) | 29.9 (31.9) | 11.7 (21.9) |
| Median [Min, Max] | 0 [0, 0] | 5.73 [0, 46.6] | 17.8 [1.32, 124] | 3.93 [0, 124] |
| Missing | 2 (6.3%) | 7 (19.4%) | 8 (24.2%) | 17 (16.8%) |
| postConsumpTimeToTest | ||||
| Mean (SD) | NA (NA) | 64.2 (6.52) | 61.2 (4.87) | 62.7 (5.95) |
| Median [Min, Max] | NA [NA, NA] | 64.0 [54.0, 84.0] | 60.0 [53.0, 74.0] | 62.0 [53.0, 84.0] |
| Missing | 32 (100%) | 0 (0%) | 0 (0%) | 32 (31.7%) |
| min_constriction_chg | ||||
| Mean (SD) | 6.22 (8.36) | 13.3 (10.5) | 11.4 (9.49) | 10.3 (9.87) |
| Median [Min, Max] | 7.35 [-14.6, 19.6] | 14.1 [-15.1, 34.4] | 9.92 [-1.72, 35.8] | 11.1 [-15.1, 35.8] |
| Missing | 3 (9.4%) | 6 (16.7%) | 8 (24.2%) | 17 (16.8%) |
| AUC_chg | ||||
| Mean (SD) | 3.06 (9.51) | 6.63 (7.82) | 7.67 (7.78) | 5.71 (8.56) |
| Median [Min, Max] | 4.21 [-21.6, 21.2] | 5.36 [-13.5, 23.5] | 5.46 [-1.82, 36.0] | 5.36 [-21.6, 36.0] |
| Missing | 3 (9.4%) | 6 (16.7%) | 8 (24.2%) | 17 (16.8%) |
| duration_chg | ||||
| Mean (SD) | -6.48 (58.0) | -16.4 (78.2) | -3.96 (42.9) | -9.29 (61.9) |
| Median [Min, Max] | -10.0 [-145, 152] | -8.00 [-219, 198] | -2.00 [-105, 108] | -7.00 [-219, 198] |
| Missing | 3 (9.4%) | 6 (16.7%) | 8 (24.2%) | 17 (16.8%) |
| slope_chg | ||||
| Mean (SD) | -0.0239 (0.0348) | -0.0398 (0.0384) | -0.0323 (0.0523) | -0.0321 (0.0420) |
| Median [Min, Max] | -0.0224 [-0.0959, 0.0727] | -0.0321 [-0.136, 0.0321] | -0.0259 [-0.176, 0.0372] | -0.0290 [-0.176, 0.0727] |
| Missing | 3 (9.4%) | 6 (16.7%) | 8 (24.2%) | 17 (16.8%) |
| avg_end_percent_chg | ||||
| Mean (SD) | 0.687 (9.11) | 0.836 (5.84) | 4.54 (9.08) | 1.89 (8.17) |
| Median [Min, Max] | 1.02 [-26.8, 20.7] | 0.627 [-14.6, 13.7] | 2.70 [-5.32, 42.3] | 1.30 [-26.8, 42.3] |
| Missing | 3 (9.4%) | 6 (16.7%) | 8 (24.2%) | 17 (16.8%) |
I’m looking for any sharp declines over 10 frames after the 150th frame and then visualizing those trajectories to determine if there might be misalignment of the light test start frame.
ps$lagPctChg <- c(rep(0, 10), diff(ps$percent_change, lag = 10))
ps$lagID <- dplyr::lag(ps$subject_id, 10)
ps$lagtime <- dplyr::lag(ps$time, 10)
ps$lagEye <- dplyr::lag(ps$eye, 10)
ps$lagPctChg <- ifelse(ps$subject_id == ps$lagID & ps$time == ps$lagtime & ps$eye == ps$lagEye,
ps$lagPctChg, 0)
summary(ps$lagPctChg[ps$frame > 150])
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -46.4908 -0.1719 0.2073 0.2394 0.9691 38.6126
hist(ps$lagPctChg[ps$frame > 150], breaks = 1000)
ps.150 <- ps[ps$frame > 150 & ps$eye == "Right", ]
pot.misaligned <- unique(ps.150[ps.150$lagPctChg <= -5, c("subject_id", "time", "user_type", "eye")])
pot.misaligned <- pot.misaligned[!(is.na(pot.misaligned$subject_id)), ]
pot.misaligned$misaligned <- 1
misaligned <- merge(ps, pot.misaligned,
by= c("subject_id", "time", "user_type", "eye"),
all.y = T)
mis.right <- misaligned[misaligned$eye == "Right", ]
misAlign.id <- unique(misaligned$subject_id)
for(i in misAlign.id){
print(ggplot(mis.right[mis.right$subject_id == i, ],
aes(x=frame, y = percent_change,
group = subject_id, color = i))+
geom_line()+
facet_grid(rows = vars(subject_id), cols = vars(tp))+
labs(title = paste0("Potential MisAligned Subject: ", i))+
theme_bw())
}
# jpeg(file.path(ps_folder, "Potential_Misaligned_001_109.jpg"),
# width = 12, height = 5, units = "in", res = 300)
# ggplot(mis.right[mis.right$subject_id == "001-109", ],
# aes(x=frame, y = percent_change, group = subject_id, color = subject_id))+
# geom_line()+
# facet_grid(rows = vars(subject_id), cols = vars(tp))+
# labs(title = "Potential MisAligned ")+
# theme_bw()
# dev.off()
# jpeg(file.path(ps_folder, "Potential_Misaligned_001_060.jpg"),
# width = 12, height = 5, units = "in", res = 300)
# ggplot(mis.right[mis.right$subject_id == "001-060", ],
# aes(x=frame, y = percent_change, group = subject_id, color = subject_id))+
# geom_line()+
# facet_grid(rows = vars(subject_id), cols = vars(tp))+
# labs(title = "Potential MisAligned ")+
# theme_bw()
# dev.off()
### New alignment view
# jpeg(file.path(ps_folder, "Potential_Misaligned_001_007.jpg"),
# width = 12, height = 5, units = "in", res = 300)
# ggplot(mis.right[mis.right$subject_id == "001-007", ],
# aes(x=frame, y = percent_change, group = subject_id, color = subject_id))+
# geom_line()+
# facet_grid(rows = vars(subject_id), cols = vars(tp))+
# labs(title = "Potential MisAligned: start at 2nd bump?")+
# theme_bw()
# dev.off()
#
# jpeg(file.path(ps_folder, "Potential_Misaligned_001_033.jpg"),
# width = 12, height = 5, units = "in", res = 300)
# ggplot(mis.right[mis.right$subject_id == "001-033", ],
# aes(x=frame, y = percent_change, group = subject_id, color = subject_id))+
# geom_line()+
# facet_grid(rows = vars(subject_id), cols = vars(tp))+
# labs(title = "Potential MisAligned: Odd pattern")+
# theme_bw()
# dev.off()
#
# jpeg(file.path(ps_folder, "Potential_Misaligned_001_038.jpg"),
# width = 12, height = 5, units = "in", res = 300)
# ggplot(mis.right[mis.right$subject_id == "001-038", ],
# aes(x=frame, y = percent_change, group = subject_id, color = subject_id))+
# geom_line()+
# facet_grid(rows = vars(subject_id), cols = vars(tp))+
# labs(title = "Potential MisAligned: Odd pattern")+
# theme_bw()
# dev.off()
#
# jpeg(file.path(ps_folder, "Potential_Misaligned_001_043.jpg"),
# width = 12, height = 5, units = "in", res = 300)
# ggplot(mis.right[mis.right$subject_id == "001-043", ],
# aes(x=frame, y = percent_change, group = subject_id, color = subject_id))+
# geom_line()+
# facet_grid(rows = vars(subject_id), cols = vars(tp))+
# labs(title = "Potential MisAligned: Both look odd, but mainly check pre; maybe review video")+
# theme_bw()
# dev.off()
Remove Outliers
## Remove known outliers
ps$outliers <- 0
# ps$outliers[ps$subject_id == "001-109" & ps$tp == "pre2"] <- 1
ps$outliers[ps$subject_id == "001-060" & ps$tp == "post"] <- 1
ps <- ps[ps$outliers == 0, ]
Plot of Pupil Size and Percent Change for “PRE” data by Eye and User Category
pre.df <- ps[ps$tp == "pre2", ]
ggplot(pre.df, aes(x=frame, y = pupil_size, group = subject_id, color = subject_id))+
geom_line(show.legend = FALSE)+
facet_grid(rows = vars(user_cat), cols = vars(eye))+
labs(title ="Pupil Size by Eye and THC use category")+
theme_bw()
ggplot(pre.df, aes(x=frame, y = percent_change, group = subject_id, color = subject_id))+
geom_line(show.legend = FALSE)+
facet_grid(rows = vars(user_cat), cols = vars(eye))+
labs(title = "Percent Change by Eye and THC use category")+
theme_bw()
Plot of PRE/POST for Right Eye
right.df <- ps[ps$eye == "Right", ]
ggplot(right.df, aes(x=frame, y = pupil_size, group = subject_id, color = subject_id))+
geom_line(show.legend = FALSE)+
facet_grid(rows = vars(user_cat), cols = vars(tp))+
labs(title ="Pupil Size by Pre/Post and THC use category")+
theme_bw()
ggplot(right.df, aes(x=frame, y = percent_change, group = subject_id, color = subject_id))+
geom_line(show.legend = FALSE)+
facet_grid(rows = vars(user_cat), cols = vars(tp))+
labs(title = "Percent Change by Pre/Post and THC use category")+
theme_bw()
Plots of Left and Right Eye for “POST” data. One pt has negative values for pupil size.
post.df <- ps[ps$tp == "post", ]
ggplot(post.df, aes(x=frame, y = pupil_size, group = subject_id, color = subject_id))+
geom_line(show.legend = FALSE)+
facet_grid(rows = vars(user_cat), cols = vars(eye))+
labs(title ="Pupil Size by Eye and THC use category")+
theme_bw()
ggplot(post.df, aes(x=frame, y = percent_change, group = subject_id, color = subject_id))+
geom_line(show.legend = FALSE)+
facet_grid(rows = vars(user_cat), cols = vars(eye))+
labs(title = "Percent Change by Eye and THC use category")+
theme_bw()
Plot the Mean and +/- 1 SD functions for the percent change in pupil light reflex data for the pre and post data by THC user category. (Added code to input frame numbers when NA).
right_0.df <- right.df[right.df$frame >= 0, c("subject_id", "tp", "user_cat",
"frame", "percent_change")]
right_0.w <- reshape(right_0.df,
timevar = "frame",
idvar = c("subject_id", "tp", "user_cat"),
direction = "wide")
rownames(right_0.w) <- paste0(right_0.w$subject_id, "_", right_0.w$tp)
pct_chg <- names(right_0.w[, 4:602])
mean_fxn <- as.data.frame(aggregate(right_0.w[, 4:602],
list(right_0.w$tp,
right_0.w$user_cat),
FUN = function(x) mean(x, na.rm = T)))
mean_fxn.l <- reshape(mean_fxn,
varying = pct_chg,
v.names = "percent_change",
timevar = "frame",
times = pct_chg,
direction = "long")
mean_fxn.l$frame <- as.numeric(gsub("percent_change.", "", mean_fxn.l$frame))
rownames(mean_fxn.l) <- NULL
mean_fxn.l$id <- NULL
names(mean_fxn.l)[names(mean_fxn.l) == "Group.1"] <- "tp"
names(mean_fxn.l)[names(mean_fxn.l) == "Group.2"] <- "user_cat"
mean_fxn.l$grp <- paste0(mean_fxn.l$tp, "_", mean_fxn.l$user_cat)
ggplot(mean_fxn.l, aes(x=frame, y = percent_change, group = grp,
color = user_cat))+
geom_line()+
facet_grid(rows = vars(tp))+
labs(title = "Average Percent Change by Pre/Post and THC use category")+
theme_bw()
## Warning: Removed 449 row(s) containing missing values (geom_path).
sd_fxn <- as.data.frame(aggregate(right_0.w[, 4:602],
list(right_0.w$tp,
right_0.w$user_cat),
FUN = function(x) sd(x, na.rm = T)))
NA’s are when there’s no data in for that time point and user category. Min frame value for NA is 480.
Truncating to frame 400
right_0.fpca <- fpca.face(Y = as.matrix(right_0.w[, 4:404]), pve = 0.95, knots = 30, var = T)
# str(right_0.fpca)
# plot_shiny(right_0.fpca)
right_0.fpca.pve <- round(right_0.fpca$evalues/sum(right_0.fpca$evalues)*100, 2)
right_0.fpca.pve
## [1] 81.85 8.95 5.08 2.57 1.56
mu <- right_0.fpca$mu
right_sd <- sqrt(right_0.fpca$evalues)
right_Phi <- right_0.fpca$efunctions
df_plot <- data.frame(frame = seq(0, 400, by = 1),
mu = mu,
errband1 = 2*right_Phi[, 1]*right_sd[1],
errband2 = 2*right_Phi[, 2]*right_sd[2],
errband3 = 2*right_Phi[, 3]*right_sd[3],
errband4 = 2*right_Phi[, 4]*right_sd[4],
errband5 = 2*right_Phi[, 5]*right_sd[5])
colors <- c("Pop.Mean" = "black", "+2SD" = "red", "-2SD" = "blue")
plot_pc1 <- ggplot(data = df_plot, aes(x = frame, y = mu))+
geom_line(aes(color = "Pop.Mean"))+
geom_line(aes(x = frame, y = mu+errband1, color = "+2SD"))+
geom_line(aes(x = frame, y = mu-errband1, color = "-2SD"))+
labs(x = "frame",
y = "Percent Change",
color = "Legend",
title = paste("PC1:", "% Var Explained:", right_0.fpca.pve[1]))+
scale_color_manual(values = colors)+
theme_bw()
plot_pc2 <- ggplot(data = df_plot, aes(x = frame, y = mu))+
geom_line(aes(color = "Pop.Mean"))+
geom_line(aes(x = frame, y = mu+errband2, color = "+2SD"))+
geom_line(aes(x = frame, y = mu-errband2, color = "-2SD"))+
labs(x = "frame",
y = "Percent Change",
color = "Legend",
title = paste("PC2:", "% Var Explained:", right_0.fpca.pve[2]))+
scale_color_manual(values = colors)+
theme_bw()
plot_pc3 <- ggplot(data = df_plot, aes(x = frame, y = mu))+
geom_line(aes(color = "Pop.Mean"))+
geom_line(aes(x = frame, y = mu+errband3, color = "+2SD"))+
geom_line(aes(x = frame, y = mu-errband3, color = "-2SD"))+
labs(x = "frame",
y = "Percent Change",
color = "Legend",
title = paste("PC3:", "% Var Explained:", right_0.fpca.pve[3]))+
scale_color_manual(values = colors)+
theme_bw()
plot_pc4 <- ggplot(data = df_plot, aes(x = frame, y = mu))+
geom_line(aes(color = "Pop.Mean"))+
geom_line(aes(x = frame, y = mu+errband4, color = "+2SD"))+
geom_line(aes(x = frame, y = mu-errband4, color = "-2SD"))+
labs(x = "frame",
y = "Percent Change",
color = "Legend",
title = paste("PC4:", "% Var Explained:", right_0.fpca.pve[4]))+
scale_color_manual(values = colors)+
theme_bw()
plot_pc5 <- ggplot(data = df_plot, aes(x = frame, y = mu))+
geom_line(aes(color = "Pop.Mean"))+
geom_line(aes(x = frame, y = mu+errband5, color = "+2SD"))+
geom_line(aes(x = frame, y = mu-errband5, color = "-2SD"))+
labs(x = "frame",
y = "Percent Change",
color = "Legend",
title = paste("PC5:", "% Var Explained:", right_0.fpca.pve[5]))+
scale_color_manual(values = colors)+
theme_bw()
plot_pc1
PC1 plot: Individuals with low loading on PC1 (-2SD) have less constriction than the average and individuals with a high loading (+2SD) have more constriction. Rebound effect?
plot_pc2
PC2 plot: Overall shape of trajectory & pattern of recovery
Plot individuals with high/low PC2 overall scores.
right_scores <- right_0.fpca$scores
q.95 <- quantile(right_scores[, 2], p = 0.95)
highPC2 <- rownames(right_scores)[right_scores[, 2] > q.95]
highPC2.df <- data.frame(subject_id = substr(highPC2, 1, 7),
tp = substr(highPC2, 9,12))
for(i in 1:nrow(highPC2.df)){
plot.df <- right_0.df[right_0.df$subject_id == highPC2.df$subject_id[i] & right_0.df$tp == highPC2.df$tp[i], ]
print(ggplot(plot.df, aes(x=frame, y = percent_change, group = subject_id, color = subject_id))+
geom_line(show.legend = FALSE)+
labs(title = paste("Percent Change for high PC2 score --", "Subject:", highPC2.df$subject_id[i], "Timepoint:", highPC2.df$tp[i]))+
theme_bw())
}
q.05 <- quantile(right_scores[, 2], p = 0.05)
lowPC2 <- rownames(right_scores)[right_scores[, 2] < q.05]
lowPC2.df <- data.frame(subject_id = substr(lowPC2, 1, 7),
tp = substr(lowPC2, 9,12))
for(i in 1:nrow(lowPC2.df)){
plot.df <- right_0.df[right_0.df$subject_id == lowPC2.df$subject_id[i] & right_0.df$tp == lowPC2.df$tp[i], ]
print(ggplot(plot.df, aes(x=frame, y = percent_change, group = subject_id, color = subject_id))+
geom_line(show.legend = FALSE)+
labs(title = paste("Percent Change for low PC2 score --", "Subject:", lowPC2.df$subject_id[i], "Timepoint:", lowPC2.df$tp[i]))+
theme_bw())
}
plot_pc3
plot_pc4
plot_pc5
Make sure datasets have the same particpants and are truncated at frame 400
right_0.post <- right_0.df[right_0.df$tp == "post", ]
right_0.post <- right_0.post[right_0.post$frame <= 400, ]
post.ids <- unique(right_0.post$subject_id)
right_0.post.w <- reshape(right_0.post,
timevar = "frame",
idvar = c("subject_id", "tp", "user_cat"),
direction = "wide")
post.vars <- names(right_0.post.w)[4:ncol(right_0.post.w)]
right_0.pt <- reshape(right_0.df[right_0.df$frame <= 400,],
timevar = "tp",
idvar = c("subject_id", "user_cat", "frame"),
direction = "wide")
right_0.pt$wiPctChg <- right_0.pt$percent_change.post - right_0.pt$percent_change.pre2
right_0.pt <- right_0.pt[, c("subject_id", "user_cat", "frame", "wiPctChg")]
right_0.pt.w <- reshape(right_0.pt,
timevar = "frame",
idvar = c("subject_id", "user_cat"),
direction = "wide")
# remove rows where all data is missing (e.g. pt didn't have both pre and post)
allMissing <- rowSums(is.na(right_0.pt.w[, 3:403]))
rowsAllMissing <- names(allMissing)[allMissing == 401]
right_0.pt.w <- right_0.pt.w[!(rownames(right_0.pt.w) %in% rowsAllMissing), ]
rownames(right_0.pt.w) <- paste0(right_0.pt.w$subject_id, "_", right_0.pt.w$user_cat)
analytic.N <- post.ids[post.ids %in% right_0.pt.w$subject_id]
## Filter all datasets to analytic sample
right_0.df <- right_0.df[right_0.df$subject_id %in% analytic.N,]
right_0.post <- right_0.post[right_0.post$subject_id %in% analytic.N, ]
right_0.post.w <- right_0.post.w[right_0.post.w$subject_id %in% analytic.N ,]
row.names(right_0.post.w) <- right_0.post.w$subject_id
right_0.pt <- right_0.pt[right_0.pt$subject_id %in% analytic.N, ]
right_0.pt.w <- right_0.pt.w[right_0.pt.w$subject_id %in% analytic.N, ]
row.names(right_0.pt.w) <- right_0.pt.w$subject_id
No Consumption end time recorded for non-users
# Demographics Table by User Category
table1(~demo_age + demo_weight+ demo_height+BMI + demo_gender + thc_chg + postConsumpTimeToTest + min_constriction_chg + AUC_chg + duration_chg + slope_chg + avg_end_percent_chg|user_cat,
data = pt.df[pt.df$subject_id %in% analytic.N, ])
| non-user (N=29) |
occasional (N=30) |
daily (N=25) |
Overall (N=84) |
|
|---|---|---|---|---|
| demo_age | ||||
| Mean (SD) | 32.3 (4.70) | 31.1 (4.75) | 32.8 (5.71) | 32.0 (5.02) |
| Median [Min, Max] | 31.1 [25.7, 42.2] | 29.7 [25.1, 41.9] | 32.0 [25.4, 45.3] | 31.1 [25.1, 45.3] |
| demo_weight | ||||
| Mean (SD) | 167 (48.8) | 169 (34.0) | 180 (29.8) | 172 (38.7) |
| Median [Min, Max] | 162 [85.0, 284] | 165 [105, 240] | 175 [125, 250] | 169 [85.0, 284] |
| demo_height | ||||
| Mean (SD) | 68.0 (4.97) | 69.5 (3.84) | 68.5 (3.40) | 68.7 (4.16) |
| Median [Min, Max] | 67.0 [60.0, 78.0] | 69.5 [61.0, 76.0] | 69.0 [63.0, 76.0] | 69.0 [60.0, 78.0] |
| BMI | ||||
| Mean (SD) | 24.9 (4.72) | 24.5 (3.96) | 27.1 (4.26) | 25.4 (4.41) |
| Median [Min, Max] | 23.9 [16.6, 34.1] | 24.3 [16.9, 32.5] | 26.8 [19.0, 34.9] | 24.7 [16.6, 34.9] |
| demo_gender | ||||
| Male | 13 (44.8%) | 20 (66.7%) | 16 (64.0%) | 49 (58.3%) |
| Female | 16 (55.2%) | 10 (33.3%) | 9 (36.0%) | 35 (41.7%) |
| thc_chg | ||||
| Mean (SD) | 0 (0) | 8.20 (9.71) | 29.9 (31.9) | 11.9 (22.0) |
| Median [Min, Max] | 0 [0, 0] | 5.73 [0, 46.6] | 17.8 [1.32, 124] | 3.96 [0, 124] |
| Missing | 0 (0%) | 1 (3.3%) | 0 (0%) | 1 (1.2%) |
| postConsumpTimeToTest | ||||
| Mean (SD) | NA (NA) | 63.9 (6.26) | 60.2 (3.78) | 62.2 (5.57) |
| Median [Min, Max] | NA [NA, NA] | 63.5 [54.0, 84.0] | 60.0 [53.0, 67.0] | 62.0 [53.0, 84.0] |
| Missing | 29 (100%) | 0 (0%) | 0 (0%) | 29 (34.5%) |
| min_constriction_chg | ||||
| Mean (SD) | 6.22 (8.36) | 13.3 (10.5) | 11.4 (9.49) | 10.3 (9.87) |
| Median [Min, Max] | 7.35 [-14.6, 19.6] | 14.1 [-15.1, 34.4] | 9.92 [-1.72, 35.8] | 11.1 [-15.1, 35.8] |
| AUC_chg | ||||
| Mean (SD) | 3.06 (9.51) | 6.63 (7.82) | 7.67 (7.78) | 5.71 (8.56) |
| Median [Min, Max] | 4.21 [-21.6, 21.2] | 5.36 [-13.5, 23.5] | 5.46 [-1.82, 36.0] | 5.36 [-21.6, 36.0] |
| duration_chg | ||||
| Mean (SD) | -6.48 (58.0) | -16.4 (78.2) | -3.96 (42.9) | -9.29 (61.9) |
| Median [Min, Max] | -10.0 [-145, 152] | -8.00 [-219, 198] | -2.00 [-105, 108] | -7.00 [-219, 198] |
| slope_chg | ||||
| Mean (SD) | -0.0239 (0.0348) | -0.0398 (0.0384) | -0.0323 (0.0523) | -0.0321 (0.0420) |
| Median [Min, Max] | -0.0224 [-0.0959, 0.0727] | -0.0321 [-0.136, 0.0321] | -0.0259 [-0.176, 0.0372] | -0.0290 [-0.176, 0.0727] |
| avg_end_percent_chg | ||||
| Mean (SD) | 0.687 (9.11) | 0.836 (5.84) | 4.54 (9.08) | 1.89 (8.17) |
| Median [Min, Max] | 1.02 [-26.8, 20.7] | 0.627 [-14.6, 13.7] | 2.70 [-5.32, 42.3] | 1.30 [-26.8, 42.3] |
ggplot(right_0.post, aes(x=frame, y = percent_change, group = subject_id, color = subject_id))+
geom_line(show.legend = FALSE)+
facet_grid(rows = vars(user_cat))+
labs(title ="POST data percent change by THC use category")+
theme_bw()
## Within Subject ### Mean function
Calculate the difference (post-pre) within subjects and plot by THC user category
ggplot(right_0.pt, aes(x=frame, y = wiPctChg, group = subject_id, color = subject_id))+
geom_line(show.legend = FALSE)+
facet_grid(rows = vars(user_cat))+
labs(title ="Within subject difference in percent change by THC use category")+
theme_bw()
## Warning: Removed 1193 row(s) containing missing values (geom_path).
Non-user plot seems “centered” around 0 but still showing
heterogeneity. Lots of heterogeneity across user-groups, but a mostly
positive difference.
mean_fxn.pt <- as.data.frame(aggregate(right_0.pt.w[, 3:403],
list(right_0.pt.w$user_cat),
FUN = function(x) mean(x, na.rm = T)))
wiPctChg <- names(right_0.pt.w)[3:403]
mean_fxn.pt.l <- reshape(mean_fxn.pt,
varying = wiPctChg,
v.names = "wi_percent_change",
timevar = "frame",
times = wiPctChg,
direction = "long")
mean_fxn.pt.l$frame <- as.numeric(gsub("wiPctChg.", "", mean_fxn.pt.l$frame))
rownames(mean_fxn.pt.l) <- NULL
mean_fxn.pt.l$id <- NULL
names(mean_fxn.pt.l)[names(mean_fxn.pt.l) == "Group.1"] <- "user_cat"
ggplot(mean_fxn.pt.l, aes(x=frame, y = wi_percent_change, group = user_cat, color = user_cat))+
geom_line()+
labs(title ="Average Within subject difference in percent change by THC use category")+
theme_bw()
Difference in trajectories (post-pre), show a distinct difference between non-user and both user groups (up to frame 200), but the differences might not be significant. Harder to distinguish between the occasional and daily user trajectories.
# Check to make sure there are significant numbers in each user category
table(right_0.pt.w$user_cat)
##
## non-user occasional daily
## 29 30 25
sd_fxn.pt <- as.data.frame(aggregate(right_0.pt.w[, 3:403],
list(right_0.pt.w$user_cat),
FUN = function(x) sd(x, na.rm = T)))
wiPctChg <- names(right_0.pt.w)[3:403]
sd_fxn.pt.l <- reshape(sd_fxn.pt,
varying = wiPctChg,
v.names = "wi_percent_change",
timevar = "frame",
times = wiPctChg,
direction = "long")
sd_fxn.pt.l$frame <- as.numeric(gsub("wiPctChg.", "", sd_fxn.pt.l$frame))
rownames(sd_fxn.pt.l) <- NULL
sd_fxn.pt.l$id <- NULL
names(sd_fxn.pt.l)[names(sd_fxn.pt.l) == "Group.1"] <- "user_cat"
sd_fxn.pt.l$wi_percent_change_neg <- -1*sd_fxn.pt.l$wi_percent_change
ggplot(sd_fxn.pt.l, aes(x=frame, y = wi_percent_change, group = user_cat, color = user_cat))+
geom_line()+
geom_line(aes(x=frame, y = wi_percent_change_neg, group = user_cat, color = user_cat),
linetype = "dashed")+
labs(title ="+/- 1 SD Within subject difference in percent change by THC use category")+
theme_bw()
## number of missing data points in each column
missingnessCol <- apply(right_0.pt.w[, 3:403],
MARGIN = 2,
FUN = function(x) sum(is.na(x)))
sum(missingnessCol == 0) ## 140 columns without any missing data
## [1] 143
sum(missingnessCol == 84) ## 109 columns with all missing data
## [1] 0
sum(missingnessCol <= 68) ## 438 columns where at least 80% of participants have data
## [1] 401
hist(missingnessCol, breaks = 30)
missing.df <- data.frame(frame = seq(0, 400, by =1),
missingness = round(missingnessCol/nrow(right_0.pt.w)*100, 2),
stringsAsFactors = F)
ggplot(missing.df, aes(x=frame, y= missingness))+
geom_line()
Truncate within person difference data to frame 400 where missingness
reaches 50%.
Try to visualize missing data in within person data frame
wi_pct_chg <- names(right_0.pt.w)[3:403]
right_0.pt.l <- reshape(right_0.pt.w,
varying = wi_pct_chg,
v.names = "wi_pct_chg_diff",
timevar = 'frame',
times = wi_pct_chg,
direction = "long")
right_0.pt.l$frame <- as.numeric(gsub("wiPctChg.", "", right_0.pt.l$frame))
rownames(right_0.pt.l) <- NULL
right_0.pt.l$id <- NULL
right_0.pt.l$notMissing <- ifelse(is.na(right_0.pt.l$wi_pct_chg_diff), 0, 1)
ggplot(right_0.pt.l, aes(x = as.factor(frame), y = subject_id, fill = as.factor(notMissing)))+
geom_raster(alpha = 0.8)+
scale_fill_manual(values = c("black", 'white'),
labels = c("Missing", "Present"))+
theme(axis.text.x=element_text(angle = 45, vjust = 1, hjust = 1))
right_0_wi.fpca <- fpca.face(Y = as.matrix(right_0.pt.w[, 3:403]), pve = 0.95, knots = 30, var = T)
# str(right_0.fpca)
# plot_shiny(right_0.fpca)
right_0_wi.fpca.pve <- round(right_0_wi.fpca$evalues/sum(right_0_wi.fpca$evalues)*100, 2)
mu <- right_0_wi.fpca$mu
right_sd <- sqrt(right_0_wi.fpca$evalues)
right_Phi <- right_0_wi.fpca$efunctions
wi.df_plot <- data.frame(frame = seq(0, 400, by = 1),
mu = mu,
errband1 = 2*right_Phi[, 1]*right_sd[1],
errband2 = 2*right_Phi[, 2]*right_sd[2],
errband3 = 2*right_Phi[, 3]*right_sd[3],
errband4 = 2*right_Phi[, 4]*right_sd[4],
errband5 = 2*right_Phi[, 5]*right_sd[5],
errband6 = 2*right_Phi[, 6]*right_sd[6])
colors <- c("Pop.Mean" = "black", "+2SD" = "red", "-2SD" = "blue")
wi.plot_pc1 <- ggplot(data = wi.df_plot, aes(x = frame, y = mu))+
geom_line(aes(color = "Pop.Mean"))+
geom_line(aes(x = frame, y = mu+errband1, color = "+2SD"))+
geom_line(aes(x = frame, y = mu-errband1, color = "-2SD"))+
labs(x = "frame",
y = "Percent Change",
color = "Legend",
title = paste("PC1:", "% Var Explained:", right_0_wi.fpca.pve[1]))+
scale_color_manual(values = colors)+
theme_bw()
wi.plot_pc2 <- ggplot(data = wi.df_plot, aes(x = frame, y = mu))+
geom_line(aes(color = "Pop.Mean"))+
geom_line(aes(x = frame, y = mu+errband2, color = "+2SD"))+
geom_line(aes(x = frame, y = mu-errband2, color = "-2SD"))+
labs(x = "frame",
y = "Percent Change",
color = "Legend",
title = paste("PC2:", "% Var Explained:", right_0_wi.fpca.pve[2]))+
scale_color_manual(values = colors)+
theme_bw()
wi.plot_pc3 <- ggplot(data = wi.df_plot, aes(x = frame, y = mu))+
geom_line(aes(color = "Pop.Mean"))+
geom_line(aes(x = frame, y = mu+errband3, color = "+2SD"))+
geom_line(aes(x = frame, y = mu-errband3, color = "-2SD"))+
labs(x = "frame",
y = "Percent Change",
color = "Legend",
title = paste("PC3:", "% Var Explained:", right_0_wi.fpca.pve[3]))+
scale_color_manual(values = colors)+
theme_bw()
wi.plot_pc4 <- ggplot(data = wi.df_plot, aes(x = frame, y = mu))+
geom_line(aes(color = "Pop.Mean"))+
geom_line(aes(x = frame, y = mu+errband4, color = "+2SD"))+
geom_line(aes(x = frame, y = mu-errband4, color = "-2SD"))+
labs(x = "frame",
y = "Percent Change",
color = "Legend",
title = paste("PC4:", "% Var Explained:", right_0_wi.fpca.pve[4]))+
scale_color_manual(values = colors)+
theme_bw()
wi.plot_pc5 <- ggplot(data = wi.df_plot, aes(x = frame, y = mu))+
geom_line(aes(color = "Pop.Mean"))+
geom_line(aes(x = frame, y = mu+errband5, color = "+2SD"))+
geom_line(aes(x = frame, y = mu-errband5, color = "-2SD"))+
labs(x = "frame",
y = "Percent Change",
color = "Legend",
title = paste("PC5:", "% Var Explained:", right_0_wi.fpca.pve[5]))+
scale_color_manual(values = colors)+
theme_bw()
wi.plot_pc6 <- ggplot(data = wi.df_plot, aes(x = frame, y = mu))+
geom_line(aes(color = "Pop.Mean"))+
geom_line(aes(x = frame, y = mu+errband6, color = "+2SD"))+
geom_line(aes(x = frame, y = mu-errband6, color = "-2SD"))+
labs(x = "frame",
y = "Percent Change",
color = "Legend",
title = paste("PC6:", "% Var Explained:", right_0_wi.fpca.pve[6]))+
scale_color_manual(values = colors)+
theme_bw()
Create dataset of scores
wi.scores <- as.data.frame(right_0_wi.fpca$scores)
names(wi.scores) <- c("PC1", "PC2", "PC3", "PC4", "PC5", "PC6")
wi.scores$subject_id <- rownames(wi.scores)
wi.plot_pc1
PC1: Individuals with high scores on PC1 have a weaker minimal constriction at post compared to pre. Individuals with low scores on PC1 have a stronger minimal constriction at post compared to pre.
# pc_ind_plots <- function(scores.df, raw.df, pc_plot.df, q, pc= "PC1", id = "subject_id", pc.val = 1, pc_plot.var = "wiPctChg", raw.plot.var = "percent_change"){
# q.pc <- quantile(scores.df[, pc], probs = q)
# q.ids <- scores.df[scores.df[, pc] > q.pc, id]
#
# q.df <- pc_plot.df[pc_plot.df[, id] %in% q.ids, ]
#
# colors <- c("Pop.Mean" = "black", "+2SD" = "red", "-2SD" = "blue")
#
# print(ggplot(data = pc_plot.df, aes(x=frame, y = mu))+
# geom_line(aes(color = "Pop.Mean"))+
# geom_line(aes(x = frame, y = mu+ pc_plot.df[, 2+pc.val], color = "+2SD"))+
# geom_line(aes(x = frame, y = mu- pc_plot.df[, 2+pc.val], color = "-2SD"))+
# labs(x = "frame",
# y = "Percent Change",
# color = "Legend",
# title = paste0("Individuals in the ", q*100,"th Percentile of PC1 scores"))+
# geom_line(data = q.df, aes(x=frame, y = pc_plot.var, group = id))+
# #scale_color_manual(values = colors)+
# theme_bw())
#
# for(i in q.ids){
# print(ggplot(data = raw.df[raw.df[, id] == i, ],
# aes(x = frame, y = raw.plot.var, group = tp, color = tp))+
# geom_line()+
# labs(title = paste0(i, " Pre/Post: ", q*100 , "th Percentile PC1 scores"))+
# theme_bw())
# }
# }
# pc_ind_plots(scores.df = wi.scores,
# raw.df = right_0.df,
# pc_plot.df = right_0.pt,
# q = 0.95, pc = "PC1", id = "subject_id", pc.val = 1, pc_plot.var = "wiPctChg", raw.plot.var = "percent_change")
q.95 <- quantile(wi.scores$PC1, probs = 0.95)
pctile95.wi <- wi.scores$subject_id[wi.scores$PC1 > q.95]
pctile95.wi.df <- right_0.pt[right_0.pt$subject_id %in% pctile95.wi, ]
ggplot(data = wi.df_plot, aes(x = frame, y = mu))+
geom_line(aes(color = "Pop.Mean"))+
geom_line(aes(x = frame, y = mu+errband1, color = "+2SD"))+
geom_line(aes(x = frame, y = mu-errband1, color = "-2SD"))+
labs(x = "frame",
y = "Percent Change",
color = "Legend",
title = "Individuals in the Top 5th Percentile of PC1 scores")+
geom_line(data = pctile95.wi.df, aes(x=frame, y = wiPctChg, group = subject_id, color = subject_id))+
scale_color_manual(values = colors)+
theme_bw()
## Warning: Removed 116 row(s) containing missing values (geom_path).
for(i in pctile95.wi){
print(ggplot(data = right_0.df[right_0.df$subject_id == i, ],
aes(x = frame, y = percent_change, group = tp, color = tp))+
geom_line()+
labs(title = paste0(i, " Pre/Post: Top 5th Percentile PC1 scores"))+
theme_bw())
}
q.05 <- quantile(wi.scores$PC1, probs = 0.05)
pctile05.wi <- wi.scores$subject_id[wi.scores$PC1 < q.05]
pctile05.wi.df <- right_0.pt[right_0.pt$subject_id %in% pctile05.wi, ]
ggplot(data = wi.df_plot, aes(x = frame, y = mu))+
geom_line(aes(color = "Pop.Mean"))+
geom_line(aes(x = frame, y = mu+errband1, color = "+2SD"))+
geom_line(aes(x = frame, y = mu-errband1, color = "-2SD"))+
labs(x = "frame",
y = "Percent Change",
color = "Legend",
title = "Individuals in the 5th Percentile of PC1 scores")+
geom_line(data = pctile05.wi.df, aes(x=frame, y = wiPctChg, group = subject_id, color = subject_id))+
scale_color_manual(values = colors)+
theme_bw()
## Warning: Removed 79 row(s) containing missing values (geom_path).
for(i in pctile05.wi){
print(ggplot(data = right_0.df[right_0.df$subject_id == i, ],
aes(x = frame, y = percent_change, group = tp, color = tp))+
geom_line()+
labs(title = paste0(i, " Pre/Post: 5th Percentile PC1 scores"))+
theme_bw())
}
wi.plot_pc2
PC2: Individuals with high loading on PC2 have a weaker initial constriction at post and a stronger rebound dilation past the 200th frame. Individuals with a low loading on PC2 have a stronger initial constriction at paost and a weaker rebound dilation past the 200th frame.
q.95 <- quantile(wi.scores$PC2, probs = 0.95)
pctile95.wi <- wi.scores$subject_id[wi.scores$PC2 > q.95]
pctile95.wi.df <- right_0.pt[right_0.pt$subject_id %in% pctile95.wi, ]
ggplot(data = wi.df_plot, aes(x = frame, y = mu))+
geom_line(aes(color = "Pop.Mean"))+
geom_line(aes(x = frame, y = mu+errband2, color = "+2SD"))+
geom_line(aes(x = frame, y = mu-errband2, color = "-2SD"))+
labs(x = "frame",
y = "Percent Change",
color = "Legend",
title = "Individuals in the Top 5th Percentile of PC2 scores")+
geom_line(data = pctile95.wi.df, aes(x=frame, y = wiPctChg, group = subject_id, color = subject_id))+
scale_color_manual(values = colors)+
theme_bw()
## Warning: Removed 14 row(s) containing missing values (geom_path).
for(i in pctile95.wi){
print(ggplot(data = right_0.df[right_0.df$subject_id == i, ],
aes(x = frame, y = percent_change, group = tp, color = tp))+
geom_line()+
labs(title = paste0(i, " Pre/Post: Top 5th Percentile PC2 scores"))+
theme_bw())
}
q.05 <- quantile(wi.scores$PC2, probs = 0.05)
pctile05.wi <- wi.scores$subject_id[wi.scores$PC2 < q.05]
pctile05.wi.df <- right_0.pt[right_0.pt$subject_id %in% pctile05.wi, ]
ggplot(data = wi.df_plot, aes(x = frame, y = mu))+
geom_line(aes(color = "Pop.Mean"))+
geom_line(aes(x = frame, y = mu+errband2, color = "+2SD"))+
geom_line(aes(x = frame, y = mu-errband2, color = "-2SD"))+
labs(x = "frame",
y = "Percent Change",
color = "Legend",
title = "Individuals in the 5th Percentile of PC2 scores")+
geom_line(data = pctile05.wi.df, aes(x=frame, y = wiPctChg, group = subject_id, color = subject_id))+
scale_color_manual(values = colors)+
theme_bw()
## Warning: Removed 18 row(s) containing missing values (geom_path).
for(i in pctile05.wi){
print(ggplot(data = right_0.df[right_0.df$subject_id == i, ],
aes(x = frame, y = percent_change, group = tp, color = tp))+
geom_line()+
labs(title = paste0(i, " Pre/Post: 5th Percentile PC2 scores"))+
theme_bw())
}
wi.plot_pc3
q.95 <- quantile(wi.scores$PC3, probs = 0.95)
pctile95.wi <- wi.scores$subject_id[wi.scores$PC3 > q.95]
pctile95.wi.df <- right_0.pt[right_0.pt$subject_id %in% pctile95.wi, ]
ggplot(data = wi.df_plot, aes(x = frame, y = mu))+
geom_line(aes(color = "Pop.Mean"))+
geom_line(aes(x = frame, y = mu+errband3, color = "+2SD"))+
geom_line(aes(x = frame, y = mu-errband3, color = "-2SD"))+
labs(x = "frame",
y = "Percent Change",
color = "Legend",
title = "Individuals in the Top 5th Percentile of PC3 scores")+
geom_line(data = pctile95.wi.df, aes(x=frame, y = wiPctChg, group = subject_id, color = subject_id))+
scale_color_manual(values = colors)+
theme_bw()
## Warning: Removed 23 row(s) containing missing values (geom_path).
for(i in pctile95.wi){
print(ggplot(data = right_0.df[right_0.df$subject_id == i, ],
aes(x = frame, y = percent_change, group = tp, color = tp))+
geom_line()+
labs(title = paste0(i, " Pre/Post: Top 5th Percentile PC3 scores"))+
theme_bw())
}
q.05 <- quantile(wi.scores$PC3, probs = 0.05)
pctile05.wi <- wi.scores$subject_id[wi.scores$PC3 < q.05]
pctile05.wi.df <- right_0.pt[right_0.pt$subject_id %in% pctile05.wi, ]
ggplot(data = wi.df_plot, aes(x = frame, y = mu))+
geom_line(aes(color = "Pop.Mean"))+
geom_line(aes(x = frame, y = mu+errband3, color = "+2SD"))+
geom_line(aes(x = frame, y = mu-errband3, color = "-2SD"))+
labs(x = "frame",
y = "Percent Change",
color = "Legend",
title = "Individuals in the 5th Percentile of PC3 scores")+
geom_line(data = pctile05.wi.df, aes(x=frame, y = wiPctChg, group = subject_id, color = subject_id))+
scale_color_manual(values = colors)+
theme_bw()
## Warning: Removed 21 row(s) containing missing values (geom_path).
for(i in pctile05.wi){
print(ggplot(data = right_0.df[right_0.df$subject_id == i, ],
aes(x = frame, y = percent_change, group = tp, color = tp))+
geom_line()+
labs(title = paste0(i, " Pre/Post: 5th Percentile PC3 scores"))+
theme_bw())
}
wi.plot_pc4
wi.plot_pc5
wi.plot_pc6
### Within person difference PC explanations
### INVESTIGATE BUMPS IN MEAN Function
## Pre/Post Across subjects
ggplot(mean_fxn.l, aes(x=frame, y = percent_change, group = grp,
color = user_cat))+
geom_line()+
xlim(50, 200)+
facet_grid(rows = vars(tp))+
labs(title = "Average Percent Change by Pre/Post and THC use category")+
theme_bw()
## Warning: Removed 2688 row(s) containing missing values (geom_path).
right_0.w[right_0.w$user_cat == "non-user" & right_0.w$tp == "pre2", 103:105]
## percent_change.100 percent_change.101 percent_change.102
## 001-003_pre2 -39.841378 NA -40.056700
## 001-004_pre2 -20.509286 -20.636732 -20.765188
## 001-007_pre2 -29.728903 -30.372171 -30.985599
## 001-009_pre2 -12.393281 -12.182258 -11.970736
## 001-015_pre2 -27.419396 -27.277259 -27.112306
## 001-029_pre2 -22.422026 -22.324030 -22.238468
## 001-030_pre2 -19.904482 -19.730264 -19.562660
## 001-031_pre2 -26.659914 -26.707617 -26.723738
## 001-032_pre2 -25.607501 -25.790114 -25.972873
## 001-037_pre2 -10.604402 -10.431064 -10.255967
## 001-038_pre2 -14.582329 -14.220651 -13.880713
## 001-039_pre2 -8.698685 -8.442025 -8.215622
## 001-040_pre2 -13.961272 -13.965766 -13.969542
## 001-041_pre2 -23.151100 -23.486850 -23.815640
## 001-042_pre2 -22.334270 -22.256730 -22.200960
## 001-043_pre2 -11.625390 -11.267720 -10.929380
## 001-045_pre2 -24.484760 -23.910470 -23.359550
## 001-046_pre2 -39.228640 -39.117800 -39.005780
## 001-047_pre2 -8.537555 -8.481596 -8.428778
## 001-049_pre2 -20.116580 -19.984260 -19.863700
## 001-050_pre2 -29.870190 -29.993170 -30.145070
## 001-053_pre2 -31.777050 -31.811150 -31.839820
## 001-054_pre2 -19.003080 -18.907630 -18.830010
## 001-055_pre2 -22.701870 -23.326340 -23.938730
## 001-062_pre2 -5.356276 -5.269542 -5.187438
## 001-076_pre2 -43.094070 -42.798500 -42.471880
## 001-077_pre2 -9.416607 -9.337826 -9.229779
## 001-095_pre2 -18.898660 -18.708520 -18.520320
## 001-097_pre2 -9.338060 -9.588896 -9.851689
## 001-099_pre2 -32.517180 -32.697860 -32.884540
right_0.w[right_0.w$user_cat == "non-user" & right_0.w$tp == "post", 101:103]
## percent_change.98 percent_change.99 percent_change.100
## 001-003_post -27.753056 -27.913264 -28.065082
## 001-004_post -5.257832 -5.198141 -5.149285
## 001-005_post -21.013709 -20.832539 -20.645492
## 001-007_post -18.738419 -18.445158 -18.149873
## 001-009_post -15.268272 -15.431165 -15.637139
## 001-015_post -23.671938 -23.558792 -23.444028
## 001-029_post -15.334405 -15.258121 -15.180255
## 001-030_post -12.088330 -12.022690 -11.954150
## 001-031_post -32.672950 -32.429868 -32.167635
## 001-032_post -17.526118 -17.369644 -17.229585
## 001-037_post -7.295840 -7.301405 -7.306222
## 001-038_post -14.328061 -14.119133 -13.920714
## 001-039_post -6.300429 -6.357287 -6.401905
## 001-040_post -9.732384 -9.612723 -9.492207
## 001-041_post -11.783820 -11.723060 -11.672180
## 001-042_post -23.864700 -23.870620 -23.857460
## 001-043_post -9.529095 -9.569430 -9.604660
## 001-045_post -46.643350 -46.578230 -46.528420
## 001-046_post -24.943490 -24.785340 -24.634930
## 001-047_post -5.631184 -5.614140 -5.621079
## 001-049_post -12.144500 -12.199830 -12.238350
## 001-050_post -32.971580 -33.069770 -33.122360
## 001-053_post -17.935310 -17.814510 -17.687380
## 001-054_post -27.036700 -27.191480 -27.350670
## 001-055_post -12.115960 -11.958690 -11.809640
## 001-062_post -8.366234 -8.281283 -8.206952
## 001-076_post -51.349010 NA -51.523080
## 001-077_post -3.120474 -3.135075 -3.153623
## 001-097_post -13.988710 -13.710370 -13.429870
## 001-099_post -15.639890 -15.662830 -15.693060
## Within person plots
ggplot(mean_fxn.pt.l, aes(x=frame, y = wi_percent_change, group = user_cat, color = user_cat))+
geom_line()+
xlim(50, 200)+
labs(title ="Average Within subject difference in percent change by THC use category")+
theme_bw()
## Warning: Removed 750 row(s) containing missing values (geom_path).
## Mean function Spike in Occassional user group
mean_fxn.pt[mean_fxn.pt$Group.1 == "occasional", 113:119]
## wiPctChg.112 wiPctChg.113 wiPctChg.114 wiPctChg.115 wiPctChg.116 wiPctChg.117
## 2 10.68097 10.6863 9.873832 10.76751 10.69195 10.154
## wiPctChg.118
## 2 10.75894
right_0.pt.w[right_0.pt.w$user_cat == "occasional", 116:119]
## wiPctChg.114 wiPctChg.115 wiPctChg.116 wiPctChg.117
## 001-006 11.501074 11.5256630 11.5441500 11.5567771
## 001-011 13.644011 13.2101213 12.8103992 12.4484404
## 001-014 21.310733 21.4515436 21.6131807 21.7968208
## 001-021 -1.051763 -0.7900428 -0.5539591 -0.3467591
## 001-026 9.754613 9.6877039 9.6172871 9.5439328
## 001-033 -4.590481 -4.5847367 -4.5874266 -4.5993762
## 001-061 -3.113458 -3.2049960 -3.2953840 -3.3833670
## 001-066 -2.992660 -2.6954000 -2.4078300 -2.1356400
## 001-068 -2.464190 -2.7147200 NA -3.0311000
## 001-069 10.375112 10.3261620 10.2674190 10.2007090
## 001-070 -6.368030 -6.6819500 -6.9642800 -7.2116000
## 001-073 1.098830 0.8742100 0.6410700 0.3974700
## 001-074 NA 34.4445900 34.2126300 33.9470000
## 001-079 1.521615 1.5352410 1.5492310 1.5640600
## 001-098 13.731739 13.7472930 13.7733770 13.8107680
## 001-103 13.324760 13.1186220 12.9151740 12.7150600
## 001-104 12.785490 13.1440300 13.4777300 13.7848300
## 001-105 5.559630 5.0371800 4.5128900 3.9884500
## 001-106 31.497264 31.2436990 30.9937400 30.7511480
## 001-107 13.184770 13.2843100 13.3851400 13.4873000
## 001-108 0.617610 0.8447500 1.0849700 1.3358000
## 001-109 25.655630 26.0890100 26.5101800 NA
## 001-110 20.564430 20.4366200 20.3182100 20.2085800
## 001-111 24.636681 24.5955730 NA 24.5397440
## 001-112 5.439300 5.2952300 5.1482100 5.0007500
## 001-113 4.602660 4.8828900 5.1874800 5.5139600
## 001-114 8.313380 NA 9.0936100 9.6649700
## 001-116 9.561639 10.2337310 10.9367770 11.6657380
## 001-117 25.939192 25.8583240 25.7613320 25.6491330
## 001-118 22.301552 22.0631290 21.8292470 21.6023910
## Mean function Spike in Daily user group
mean_fxn.pt[mean_fxn.pt$Group.1 == "daily", 133:149]
## wiPctChg.132 wiPctChg.133 wiPctChg.134 wiPctChg.135 wiPctChg.136 wiPctChg.137
## 3 9.276883 9.74442 9.978503 9.75998 9.534391 10.07775
## wiPctChg.138 wiPctChg.139 wiPctChg.140 wiPctChg.141 wiPctChg.142 wiPctChg.143
## 3 10.1046 10.12753 9.170939 10.31649 10.8234 11.62495
## wiPctChg.144 wiPctChg.145 wiPctChg.146 wiPctChg.147 wiPctChg.148
## 3 9.737888 10.18799 10.1544 10.17143 10.16064
right_0.pt.w[right_0.pt.w$user_cat == "daily", 143:146]
## wiPctChg.141 wiPctChg.142 wiPctChg.143 wiPctChg.144
## 001-002 NA 14.7153791 14.601849 14.495408
## 001-008 14.739642 14.7359309 14.729108 14.719145
## 001-010 11.478313 11.7425889 12.013432 12.288979
## 001-012 33.415378 33.3457661 33.274294 33.200908
## 001-013 NA 2.0212900 2.017464 2.002906
## 001-018 0.730210 0.8743158 1.019096 1.163107
## 001-022 19.108362 19.0395858 18.998221 18.982808
## 001-024 3.778025 3.7627633 NA 3.822039
## 001-027 -1.785007 -1.7402544 -1.684846 -1.620901
## 001-044 10.366936 10.2923040 10.213352 10.129905
## 001-056 3.610453 3.6402340 3.666943 3.690610
## 001-063 20.458040 20.7316600 20.963450 NA
## 001-064 2.713260 NA 3.009630 3.197960
## 001-067 11.248294 11.1776250 11.105867 11.032560
## 001-075 15.038380 15.2525100 15.432590 15.575750
## 001-081 19.957810 19.7584800 19.568360 19.387080
## 001-083 6.543770 6.7744200 NA 7.279460
## 001-085 2.503400 NA 2.673020 2.778390
## 001-086 15.468720 15.3217200 15.147030 14.945920
## 001-088 18.639526 18.5982260 18.530297 18.439524
## 001-090 12.830380 13.4777600 14.106620 14.712320
## 001-091 2.730030 2.3344100 1.952300 1.587670
## 001-093 10.249920 10.0865200 9.897910 9.687710
## 001-094 -11.587410 -11.7885300 NA -12.022410
## 001-096 15.042820 14.7835500 14.512860 14.232460
Jagged changes in the mean function lines seems to stem from missing data at those frames in the data set. The subjects per user-group is between 25 - 30, so missing data for one individual has a noticeable effect on mean function line.
post_right_0.fpca <- fpca.face(Y = as.matrix(right_0.post.w[, 4:404]), pve = 0.95, knots = 30, var = T)
post_right_0.fpca.pve <- round(post_right_0.fpca$evalues/sum(post_right_0.fpca$evalues)*100, 2)
post_right_0.fpca.pve
## [1] 83.29 10.30 4.46 1.95
post_right_scores <- as.data.frame(post_right_0.fpca$scores)
names(post_right_scores) <- c("PC1", "PC2", "PC3", "PC4")
post_right_scores$subject_id <- rownames(post_right_scores)
pt.scores <- merge(post_right_scores, pt.df,
by = c("subject_id"),
all.x = T)
pt.scores$smoker <- ifelse(pt.scores$user_cat %in% c("daily", "occasional"),1, 0)
m1 <- glm(smoker ~ PC1 + PC2 + PC3 +PC4, family = "binomial", data = pt.scores)
summary(m1)
##
## Call:
## glm(formula = smoker ~ PC1 + PC2 + PC3 + PC4, family = "binomial",
## data = pt.scores)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.9772 -1.1020 0.6899 0.8377 1.3950
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.710339 0.248148 2.863 0.0042 **
## PC1 0.001366 0.001952 0.700 0.4840
## PC2 0.006807 0.005311 1.282 0.1999
## PC3 -0.016956 0.008116 -2.089 0.0367 *
## PC4 -0.023206 0.012655 -1.834 0.0667 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 108.267 on 83 degrees of freedom
## Residual deviance: 97.494 on 79 degrees of freedom
## AIC: 107.49
##
## Number of Fisher Scoring iterations: 4
pt.scores$pred <- predict(m1, pt.scores)
pt.scores.roc <- roc(response = pt.scores$smoker, predictor = pt.scores$pred)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
pt.scores.roc
##
## Call:
## roc.default(response = pt.scores$smoker, predictor = pt.scores$pred)
##
## Data: pt.scores$pred in 29 controls (pt.scores$smoker 0) < 55 cases (pt.scores$smoker 1).
## Area under the curve: 0.6721
post.auc.pc4 <- pt.scores.roc$auc
plot.roc(pt.scores.roc, main = "Prediction of Smoker by PCs -- Post data")
m1.scalar <- glm(smoker ~ min_constriction.post + AUC.post + slope.post, family = "binomial", data = pt.scores)
summary(m1.scalar)
##
## Call:
## glm(formula = smoker ~ min_constriction.post + AUC.post + slope.post,
## family = "binomial", data = pt.scores)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.8029 -1.1135 0.6954 0.8839 1.4262
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.8457 0.6365 2.899 0.00374 **
## min_constriction.post 0.1738 0.0753 2.308 0.02100 *
## AUC.post -0.1786 0.0812 -2.200 0.02783 *
## slope.post 8.9835 15.0561 0.597 0.55073
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 108.27 on 83 degrees of freedom
## Residual deviance: 100.55 on 80 degrees of freedom
## AIC: 108.55
##
## Number of Fisher Scoring iterations: 4
pt.scores$pred <- predict(m1.scalar, pt.scores)
pt.scores.roc <- roc(response = pt.scores$smoker, predictor = pt.scores$pred)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
pt.scores.roc
##
## Call:
## roc.default(response = pt.scores$smoker, predictor = pt.scores$pred)
##
## Data: pt.scores$pred in 29 controls (pt.scores$smoker 0) < 55 cases (pt.scores$smoker 1).
## Area under the curve: 0.6752
post.auc.scalar <- pt.scores.roc$auc
plot.roc(pt.scores.roc, main = "Prediction of Smoker by Scalar Features -- Post data")
auc.df <- data.frame("Label" = c("Post AUC PCs", "Post AUC Scalar Feat"),
"AUC" = c(post.auc.pc4, post.auc.scalar),
stringsAsFactors = F)
pt.wi.scores <- merge(wi.scores, pt.df,
by = "subject_id",
all.x = T)
pt.wi.scores$smoker <- ifelse(pt.wi.scores$user_cat %in% c("daily", "occasional"),1, 0)
m2 <- glm(smoker ~ PC1 + PC2 + PC3 + PC4+ PC5+ PC6, family = "binomial", data = pt.wi.scores)
summary(m2)
##
## Call:
## glm(formula = smoker ~ PC1 + PC2 + PC3 + PC4 + PC5 + PC6, family = "binomial",
## data = pt.wi.scores)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.0283 -0.9111 0.4960 0.8616 1.5337
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.833716 0.276238 3.018 0.00254 **
## PC1 0.004188 0.001927 2.173 0.02980 *
## PC2 -0.006338 0.003960 -1.601 0.10947
## PC3 0.012296 0.005532 2.223 0.02625 *
## PC4 -0.007200 0.007160 -1.006 0.31460
## PC5 -0.016480 0.008949 -1.842 0.06554 .
## PC6 0.006621 0.010557 0.627 0.53056
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 108.267 on 83 degrees of freedom
## Residual deviance: 90.287 on 77 degrees of freedom
## AIC: 104.29
##
## Number of Fisher Scoring iterations: 5
pt.wi.scores$pred <- predict(m2, pt.wi.scores)
pt.wi.scores.roc <- roc(response = pt.wi.scores$smoker, predictor = pt.wi.scores$pred)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
pt.wi.scores.roc
##
## Call:
## roc.default(response = pt.wi.scores$smoker, predictor = pt.wi.scores$pred)
##
## Data: pt.wi.scores$pred in 29 controls (pt.wi.scores$smoker 0) < 55 cases (pt.wi.scores$smoker 1).
## Area under the curve: 0.753
plot.roc(pt.wi.scores.roc, main = "Prediction of Smoker with PCs1-6: W/I Diff")
summary(m1)
##
## Call:
## glm(formula = smoker ~ PC1 + PC2 + PC3 + PC4, family = "binomial",
## data = pt.scores)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.9772 -1.1020 0.6899 0.8377 1.3950
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.710339 0.248148 2.863 0.0042 **
## PC1 0.001366 0.001952 0.700 0.4840
## PC2 0.006807 0.005311 1.282 0.1999
## PC3 -0.016956 0.008116 -2.089 0.0367 *
## PC4 -0.023206 0.012655 -1.834 0.0667 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 108.267 on 83 degrees of freedom
## Residual deviance: 97.494 on 79 degrees of freedom
## AIC: 107.49
##
## Number of Fisher Scoring iterations: 4
pt.scores.roc
##
## Call:
## roc.default(response = pt.scores$smoker, predictor = pt.scores$pred)
##
## Data: pt.scores$pred in 29 controls (pt.scores$smoker 0) < 55 cases (pt.scores$smoker 1).
## Area under the curve: 0.6752
plot.roc(pt.scores.roc)
m5 <- glm(smoker ~ PC1 + PC2 + PC3 +PC4, family = "binomial", data = pt.wi.scores)
summary(m5)
##
## Call:
## glm(formula = smoker ~ PC1 + PC2 + PC3 + PC4, family = "binomial",
## data = pt.wi.scores)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.8862 -1.0383 0.5971 0.9155 1.6653
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.770187 0.260146 2.961 0.00307 **
## PC1 0.003971 0.001901 2.089 0.03668 *
## PC2 -0.005893 0.003720 -1.584 0.11317
## PC3 0.012529 0.005579 2.246 0.02472 *
## PC4 -0.006329 0.006930 -0.913 0.36111
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 108.267 on 83 degrees of freedom
## Residual deviance: 94.186 on 79 degrees of freedom
## AIC: 104.19
##
## Number of Fisher Scoring iterations: 4
pt.wi.scores$pred_sameNPC <- predict(m5, pt.wi.scores)
pt.wi.scores.roc2 <- roc(response = pt.wi.scores$smoker,
predictor = pt.wi.scores$pred_sameNPC)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
pt.wi.scores.roc2
##
## Call:
## roc.default(response = pt.wi.scores$smoker, predictor = pt.wi.scores$pred_sameNPC)
##
## Data: pt.wi.scores$pred_sameNPC in 29 controls (pt.wi.scores$smoker 0) < 55 cases (pt.wi.scores$smoker 1).
## Area under the curve: 0.7154
wi.auc.pcs <- pt.wi.scores.roc2$auc
auc.df <- rbind(auc.df, c("W/I Diff AUC PCS", wi.auc.pcs))
plot.roc(pt.wi.scores.roc2, main = "Prediction of Smoker with PCs1-4: W/I Diff")
m5.scalar <- glm(smoker ~ min_constriction_chg + AUC_chg + slope_chg, family = "binomial", data = pt.wi.scores)
summary(m5.scalar)
##
## Call:
## glm(formula = smoker ~ min_constriction_chg + AUC_chg + slope_chg,
## family = "binomial", data = pt.wi.scores)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.8405 -1.2068 0.6883 0.9552 1.7509
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.04381 0.34861 -0.126 0.9000
## min_constriction_chg 0.09186 0.05508 1.668 0.0953 .
## AUC_chg -0.02167 0.05380 -0.403 0.6871
## slope_chg 2.04694 7.24384 0.283 0.7775
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 108.27 on 83 degrees of freedom
## Residual deviance: 100.10 on 80 degrees of freedom
## AIC: 108.1
##
## Number of Fisher Scoring iterations: 4
pt.wi.scores$pred_wi_scalar <- predict(m5.scalar, pt.wi.scores)
pt.wi.scores.roc2 <- roc(response = pt.wi.scores$smoker,
predictor = pt.wi.scores$pred_wi_scalar)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
pt.wi.scores.roc2
##
## Call:
## roc.default(response = pt.wi.scores$smoker, predictor = pt.wi.scores$pred_wi_scalar)
##
## Data: pt.wi.scores$pred_wi_scalar in 29 controls (pt.wi.scores$smoker 0) < 55 cases (pt.wi.scores$smoker 1).
## Area under the curve: 0.6809
wi.auc.scalars <- pt.wi.scores.roc2$auc
auc.df <- rbind(auc.df, c("W/I Diff AUC Scalar Feat", wi.auc.scalars))
plot.roc(pt.wi.scores.roc2, main = "Prediction of Smoker with Scalar Change Features: W/I Diff")
auc.df$AUC <- round(as.numeric(auc.df$AUC), 3)
knitr::kable(auc.df)
| Label | AUC |
|---|---|
| Post AUC PCs | 0.672 |
| Post AUC Scalar Feat | 0.675 |
| W/I Diff AUC PCS | 0.715 |
| W/I Diff AUC Scalar Feat | 0.681 |
pt.scores$male <- ifelse(pt.scores$demo_gender == "Male", 1, 0)
pt.scores$non_user <- ifelse(pt.scores$user_cat == "non-user", 1, 0)
pt.scores$daily <- ifelse(pt.scores$user_cat == "daily", 1, 0)
pt.scores$occasional <- ifelse(pt.scores$user_cat == "occasional", 1, 0)
pt.wi.scores$male <- ifelse(pt.wi.scores$demo_gender == "Male", 1, 0)
pt.wi.scores$non_user <- ifelse(pt.wi.scores$user_cat == "non-user", 1, 0)
pt.wi.scores$daily <- ifelse(pt.wi.scores$user_cat == "daily", 1, 0)
pt.wi.scores$occasional <- ifelse(pt.wi.scores$user_cat == "occasional", 1, 0)
Compare PCs to Scalar Features
chart.Correlation(pt.scores[, c("PC1", "PC2", "PC3", "PC4",
"min_constriction.post", "duration.post",
"avg_end_percent.post", "slope.post", "AUC.post")])
Compare PCs to demog Variables
chart.Correlation(pt.scores[, c("PC1", "PC2", "PC3", "PC4",
"demo_age","male", "thc.post", "thc_chg", "BMI",
"postConsumpTimeToTest", "smoker",
"non_user", "daily", "occasional")])
## Warning in cor(x, y, use = use, method = method): the standard deviation is zero
## Warning in cor(x, y): the standard deviation is zero
## Warning in cor(x, y, use = use, method = method): the standard deviation is zero
## Warning in cor(x, y): the standard deviation is zero
Compare Scalar Features to demog variables
chart.Correlation(pt.scores[, c("min_constriction.post", "duration.post",
"avg_end_percent.post", "slope.post", "AUC.post",
"demo_age","male", "thc.post", "thc_chg", "BMI",
"postConsumpTimeToTest", "smoker",
"non_user", "daily", "occasional")])
## Warning in cor(x, y, use = use, method = method): the standard deviation is zero
## Warning in cor(x, y): the standard deviation is zero
## Warning in cor(x, y, use = use, method = method): the standard deviation is zero
## Warning in cor(x, y): the standard deviation is zero
post.age.m <- lm(demo_age ~ PC1 + PC2 + PC3 + PC4, data = pt.scores)
summary(post.age.m)
##
## Call:
## lm(formula = demo_age ~ PC1 + PC2 + PC3 + PC4, data = pt.scores)
##
## Residuals:
## Min 1Q Median 3Q Max
## -7.5983 -4.3192 -0.1694 3.1306 12.7292
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 32.014011 0.540970 59.179 <2e-16 ***
## PC1 -0.004669 0.003971 -1.176 0.2433
## PC2 -0.020637 0.011258 -1.833 0.0706 .
## PC3 0.017514 0.017186 1.019 0.3113
## PC4 -0.015467 0.026054 -0.594 0.5544
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.957 on 79 degrees of freedom
## Multiple R-squared: 0.07297, Adjusted R-squared: 0.02603
## F-statistic: 1.555 on 4 and 79 DF, p-value: 0.1947
post.wt.m <- lm(demo_weight ~ PC1 + PC2 + PC3 + PC4, data = pt.scores)
summary(post.wt.m)
##
## Call:
## lm(formula = demo_weight ~ PC1 + PC2 + PC3 + PC4, data = pt.scores)
##
## Residuals:
## Min 1Q Median 3Q Max
## -74.632 -29.544 -4.641 24.484 103.067
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 171.70001 4.22720 40.618 <2e-16 ***
## PC1 -0.01164 0.03103 -0.375 0.709
## PC2 -0.10301 0.08797 -1.171 0.245
## PC3 0.12138 0.13429 0.904 0.369
## PC4 -0.23601 0.20359 -1.159 0.250
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 38.74 on 79 degrees of freedom
## Multiple R-squared: 0.0451, Adjusted R-squared: -0.003252
## F-statistic: 0.9327 on 4 and 79 DF, p-value: 0.4494
post.ht.m <- lm(demo_height ~ PC1 + PC2 + PC3 + PC4, data = pt.scores)
summary(post.ht.m)
##
## Call:
## lm(formula = demo_height ~ PC1 + PC2 + PC3 + PC4, data = pt.scores)
##
## Residuals:
## Min 1Q Median 3Q Max
## -7.8510 -2.9594 -0.1996 2.5599 9.4466
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 68.692555 0.460221 149.260 <2e-16 ***
## PC1 -0.002644 0.003378 -0.783 0.436
## PC2 0.008402 0.009578 0.877 0.383
## PC3 0.003066 0.014620 0.210 0.834
## PC4 -0.009718 0.022165 -0.438 0.662
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.217 on 79 degrees of freedom
## Multiple R-squared: 0.02006, Adjusted R-squared: -0.02955
## F-statistic: 0.4043 on 4 and 79 DF, p-value: 0.805
post.bmi.m <- lm(BMI ~ PC1 + PC2 + PC3 + PC4, data = pt.scores)
summary(post.bmi.m)
##
## Call:
## lm(formula = BMI ~ PC1 + PC2 + PC3 + PC4, data = pt.scores)
##
## Residuals:
## Min 1Q Median 3Q Max
## -8.5201 -3.1999 -0.4876 2.4899 8.9293
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 25.4187800 0.4707331 53.998 <2e-16 ***
## PC1 -0.0003165 0.0034555 -0.092 0.9273
## PC2 -0.0208268 0.0097966 -2.126 0.0366 *
## PC3 0.0112197 0.0149543 0.750 0.4553
## PC4 -0.0365363 0.0226714 -1.612 0.1110
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.314 on 79 degrees of freedom
## Multiple R-squared: 0.0893, Adjusted R-squared: 0.04319
## F-statistic: 1.937 on 4 and 79 DF, p-value: 0.1125
post.thc.m <- lm(thc_chg ~ PC1 + PC2 + PC3 + PC4, data = pt.scores)
summary(post.thc.m)
##
## Call:
## lm(formula = thc_chg ~ PC1 + PC2 + PC3 + PC4, data = pt.scores)
##
## Residuals:
## Min 1Q Median 3Q Max
## -15.798 -10.876 -8.555 0.073 110.819
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 11.92780 2.44895 4.871 5.71e-06 ***
## PC1 0.01368 0.01787 0.765 0.446
## PC2 0.01470 0.05066 0.290 0.772
## PC3 -0.06977 0.07757 -0.899 0.371
## PC4 -0.06213 0.11728 -0.530 0.598
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 22.31 on 78 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.02175, Adjusted R-squared: -0.02842
## F-statistic: 0.4335 on 4 and 78 DF, p-value: 0.784
post.consump.m <- lm(postConsumpTimeToTest ~ PC1 + PC2 + PC3 + PC4, data = pt.scores)
summary(post.consump.m)
##
## Call:
## lm(formula = postConsumpTimeToTest ~ PC1 + PC2 + PC3 + PC4, data = pt.scores)
##
## Residuals:
## Min 1Q Median 3Q Max
## -11.1163 -3.2317 -0.3819 2.6763 19.3403
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 62.589799 0.787619 79.467 <2e-16 ***
## PC1 -0.007214 0.006664 -1.082 0.284
## PC2 -0.030761 0.018424 -1.670 0.101
## PC3 0.021000 0.031930 0.658 0.514
## PC4 0.020726 0.040208 0.515 0.608
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.54 on 50 degrees of freedom
## (29 observations deleted due to missingness)
## Multiple R-squared: 0.08302, Adjusted R-squared: 0.009663
## F-statistic: 1.132 on 4 and 50 DF, p-value: 0.3523
post.male.m <- glm(male ~ PC1 + PC2 + PC3 + PC4, family = "binomial", data = pt.scores)
summary(post.male.m)
##
## Call:
## glm(formula = male ~ PC1 + PC2 + PC3 + PC4, family = "binomial",
## data = pt.scores)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.1339 -1.1995 0.7349 1.0591 1.2890
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.378139 0.231692 1.632 0.103
## PC1 -0.002949 0.001841 -1.602 0.109
## PC2 0.005992 0.005679 1.055 0.291
## PC3 0.003588 0.007913 0.453 0.650
## PC4 -0.013829 0.011982 -1.154 0.248
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 114.10 on 83 degrees of freedom
## Residual deviance: 108.69 on 79 degrees of freedom
## AIC: 118.69
##
## Number of Fisher Scoring iterations: 4
Compare PCs to Scalar Features
chart.Correlation(pt.wi.scores[, c("PC1", "PC2", "PC3", "PC4", "PC5", "PC6",
"min_constriction_chg", "AUC_chg", "duration_chg",
"avg_end_percent_chg", "slope_chg")])
Compare PCs to demog Variables
chart.Correlation(pt.wi.scores[, c("PC1", "PC2", "PC3", "PC4", "PC5", "PC6",
"demo_age","male", "thc.post", "thc_chg", "BMI",
"postConsumpTimeToTest", "smoker",
"non_user", "daily", "occasional")])
## Warning in cor(x, y, use = use, method = method): the standard deviation is zero
## Warning in cor(x, y): the standard deviation is zero
## Warning in cor(x, y, use = use, method = method): the standard deviation is zero
## Warning in cor(x, y): the standard deviation is zero
Compare Scalar Features to demog variables
chart.Correlation(pt.wi.scores[, c("min_constriction_chg", "AUC_chg", "duration_chg",
"avg_end_percent_chg", "slope_chg",
"demo_age","male", "thc.post", "thc_chg", "BMI",
"postConsumpTimeToTest", "smoker",
"non_user", "daily", "occasional")])
## Warning in cor(x, y, use = use, method = method): the standard deviation is zero
## Warning in cor(x, y): the standard deviation is zero
## Warning in cor(x, y, use = use, method = method): the standard deviation is zero
## Warning in cor(x, y): the standard deviation is zero
wi.age.m <- lm(demo_age ~ PC1 + PC2 + PC3 + PC4, data = pt.wi.scores)
summary(wi.age.m)
##
## Call:
## lm(formula = demo_age ~ PC1 + PC2 + PC3 + PC4, data = pt.wi.scores)
##
## Residuals:
## Min 1Q Median 3Q Max
## -8.0520 -3.4697 -0.7861 2.8045 13.5917
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 31.999729 0.547774 58.418 <2e-16 ***
## PC1 0.001999 0.003692 0.541 0.590
## PC2 0.010199 0.007365 1.385 0.170
## PC3 -0.007592 0.010045 -0.756 0.452
## PC4 -0.015863 0.013467 -1.178 0.242
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.019 on 79 degrees of freedom
## Multiple R-squared: 0.04997, Adjusted R-squared: 0.001872
## F-statistic: 1.039 on 4 and 79 DF, p-value: 0.3925
wi.wt.m <- lm(demo_weight ~ PC1 + PC2 + PC3 + PC4, data = pt.wi.scores)
summary(wi.wt.m)
##
## Call:
## lm(formula = demo_weight ~ PC1 + PC2 + PC3 + PC4, data = pt.wi.scores)
##
## Residuals:
## Min 1Q Median 3Q Max
## -89.609 -25.230 -3.557 19.166 98.926
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 171.54184 4.10340 41.805 <2e-16 ***
## PC1 0.05591 0.02766 2.021 0.0466 *
## PC2 0.06831 0.05517 1.238 0.2194
## PC3 -0.08511 0.07525 -1.131 0.2615
## PC4 -0.14184 0.10088 -1.406 0.1636
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 37.59 on 79 degrees of freedom
## Multiple R-squared: 0.1007, Adjusted R-squared: 0.05512
## F-statistic: 2.21 on 4 and 79 DF, p-value: 0.07533
wi.ht.m <- lm(demo_height ~ PC1 + PC2 + PC3 + PC4, data = pt.wi.scores)
summary(wi.ht.m)
##
## Call:
## lm(formula = demo_height ~ PC1 + PC2 + PC3 + PC4, data = pt.wi.scores)
##
## Residuals:
## Min 1Q Median 3Q Max
## -8.0474 -3.2166 0.1125 2.1821 8.9696
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 68.674473 0.454544 151.084 <2e-16 ***
## PC1 0.002158 0.003064 0.704 0.483
## PC2 0.006266 0.006111 1.025 0.308
## PC3 -0.004543 0.008336 -0.545 0.587
## PC4 -0.015199 0.011175 -1.360 0.178
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.164 on 79 degrees of freedom
## Multiple R-squared: 0.04456, Adjusted R-squared: -0.003816
## F-statistic: 0.9211 on 4 and 79 DF, p-value: 0.456
wi.bmi.m <- lm(BMI ~ PC1 + PC2 + PC3 + PC4, data = pt.wi.scores)
summary(wi.bmi.m)
##
## Call:
## lm(formula = BMI ~ PC1 + PC2 + PC3 + PC4, data = pt.wi.scores)
##
## Residuals:
## Min 1Q Median 3Q Max
## -9.8028 -2.7132 -0.9886 2.5389 8.7992
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 25.404439 0.473344 53.670 <2e-16 ***
## PC1 0.006419 0.003191 2.012 0.0477 *
## PC2 0.005784 0.006364 0.909 0.3662
## PC3 -0.009338 0.008680 -1.076 0.2853
## PC4 -0.010572 0.011637 -0.908 0.3664
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.337 on 79 degrees of freedom
## Multiple R-squared: 0.07963, Adjusted R-squared: 0.03302
## F-statistic: 1.709 on 4 and 79 DF, p-value: 0.1564
wi.thc.m <- lm(thc_chg ~ PC1 + PC2 + PC3 + PC4, data = pt.wi.scores)
summary(wi.thc.m)
##
## Call:
## lm(formula = thc_chg ~ PC1 + PC2 + PC3 + PC4, data = pt.wi.scores)
##
## Residuals:
## Min 1Q Median 3Q Max
## -20.026 -10.936 -6.875 -0.898 109.527
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 11.810996 2.422971 4.875 5.62e-06 ***
## PC1 0.024117 0.016347 1.475 0.144
## PC2 -0.012641 0.032392 -0.390 0.697
## PC3 0.047229 0.044297 1.066 0.290
## PC4 0.006139 0.059244 0.104 0.918
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 22.06 on 78 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.04287, Adjusted R-squared: -0.006213
## F-statistic: 0.8734 on 4 and 78 DF, p-value: 0.4838
wi.consump.m <- lm(postConsumpTimeToTest ~ PC1 + PC2 + PC3 + PC4, data = pt.wi.scores)
summary(wi.consump.m)
##
## Call:
## lm(formula = postConsumpTimeToTest ~ PC1 + PC2 + PC3 + PC4, data = pt.wi.scores)
##
## Residuals:
## Min 1Q Median 3Q Max
## -7.6334 -3.2724 -0.3049 2.3066 20.4188
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 61.778085 0.784657 78.733 <2e-16 ***
## PC1 0.003595 0.005609 0.641 0.525
## PC2 -0.012224 0.011610 -1.053 0.297
## PC3 0.024269 0.014724 1.648 0.106
## PC4 0.006302 0.017715 0.356 0.724
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.556 on 50 degrees of freedom
## (29 observations deleted due to missingness)
## Multiple R-squared: 0.07751, Adjusted R-squared: 0.003714
## F-statistic: 1.05 on 4 and 50 DF, p-value: 0.3909
wi.male.m <- glm(male ~ PC1 + PC2 + PC3 + PC4, family = "binomial", data = pt.wi.scores)
summary(wi.male.m)
##
## Call:
## glm(formula = male ~ PC1 + PC2 + PC3 + PC4, family = "binomial",
## data = pt.wi.scores)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.6275 -1.1881 0.6683 1.0064 1.6717
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 3.849e-01 2.349e-01 1.638 0.1014
## PC1 2.539e-03 1.633e-03 1.555 0.1199
## PC2 6.971e-03 3.580e-03 1.947 0.0515 .
## PC3 -6.227e-03 4.785e-03 -1.301 0.1932
## PC4 5.269e-05 6.024e-03 0.009 0.9930
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 114.10 on 83 degrees of freedom
## Residual deviance: 106.32 on 79 degrees of freedom
## AIC: 116.32
##
## Number of Fisher Scoring iterations: 4
post_step.df <- pt.scores[, c("PC1", "PC2", "PC3", "PC4", "min_constriction.post",
"duration.post", "avg_end_percent.post",
"slope.post", "AUC.post", "smoker")]
m.post_step <- glm(smoker ~ PC1 + PC2 + PC3 + PC4, family = "binomial", data = post_step.df)
post_step.res <- step(m.post_step, direction = "backward")
## Start: AIC=107.49
## smoker ~ PC1 + PC2 + PC3 + PC4
##
## Df Deviance AIC
## - PC1 1 97.988 105.99
## - PC2 1 99.323 107.32
## <none> 97.494 107.49
## - PC4 1 101.219 109.22
## - PC3 1 102.103 110.10
##
## Step: AIC=105.99
## smoker ~ PC2 + PC3 + PC4
##
## Df Deviance AIC
## - PC2 1 99.747 105.75
## <none> 97.988 105.99
## - PC4 1 101.593 107.59
## - PC3 1 102.460 108.46
##
## Step: AIC=105.75
## smoker ~ PC3 + PC4
##
## Df Deviance AIC
## <none> 99.747 105.75
## - PC4 1 103.636 107.64
## - PC3 1 104.497 108.50
summary(post_step.res)
##
## Call:
## glm(formula = smoker ~ PC3 + PC4, family = "binomial", data = post_step.df)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.1115 -1.2096 0.7462 0.8860 1.3898
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.706343 0.244729 2.886 0.0039 **
## PC3 -0.016576 0.007901 -2.098 0.0359 *
## PC4 -0.023653 0.012717 -1.860 0.0629 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 108.267 on 83 degrees of freedom
## Residual deviance: 99.747 on 81 degrees of freedom
## AIC: 105.75
##
## Number of Fisher Scoring iterations: 4
pt.scores$pred_post_step <- predict(post_step.res, pt.scores)
pt.scores.roc3 <- roc(response = pt.wi.scores$smoker,
predictor = pt.scores$pred_post_step)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
pt.scores.roc3
##
## Call:
## roc.default(response = pt.wi.scores$smoker, predictor = pt.scores$pred_post_step)
##
## Data: pt.scores$pred_post_step in 29 controls (pt.wi.scores$smoker 0) < 55 cases (pt.wi.scores$smoker 1).
## Area under the curve: 0.674
auc.specific <- data.frame("Post AUC PCs Step", pt.scores.roc3$auc)
auc.df <- rbind(auc.df, setNames(auc.specific, names(auc.df)))
plot.roc(pt.scores.roc3, main = "Prediction of Smoker with PCs Backward Stepwise -- Post")
## Scalar
m.post_step.scalar <- glm(smoker ~ min_constriction.post + duration.post +
avg_end_percent.post + slope.post,
family = "binomial", data = post_step.df)
post_step.res <- step(m.post_step.scalar, direction = "backward")
## Start: AIC=109.58
## smoker ~ min_constriction.post + duration.post + avg_end_percent.post +
## slope.post
##
## Df Deviance AIC
## - slope.post 1 99.874 107.87
## - duration.post 1 101.475 109.47
## <none> 99.578 109.58
## - min_constriction.post 1 102.909 110.91
## - avg_end_percent.post 1 103.523 111.52
##
## Step: AIC=107.87
## smoker ~ min_constriction.post + duration.post + avg_end_percent.post
##
## Df Deviance AIC
## <none> 99.874 107.87
## - duration.post 1 102.195 108.19
## - avg_end_percent.post 1 103.578 109.58
## - min_constriction.post 1 105.689 111.69
summary(post_step.res)
##
## Call:
## glm(formula = smoker ~ min_constriction.post + duration.post +
## avg_end_percent.post, family = "binomial", data = post_step.df)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.9573 -1.1407 0.7105 0.8993 1.3750
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 4.962895 2.340790 2.120 0.0340 *
## min_constriction.post 0.074465 0.032185 2.314 0.0207 *
## duration.post -0.007895 0.005251 -1.504 0.1327
## avg_end_percent.post -0.101959 0.055702 -1.830 0.0672 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 108.267 on 83 degrees of freedom
## Residual deviance: 99.874 on 80 degrees of freedom
## AIC: 107.87
##
## Number of Fisher Scoring iterations: 4
pt.scores$pred_post_step.scalar <- predict(post_step.res, pt.scores)
pt.scores.roc3 <- roc(response = pt.wi.scores$smoker,
predictor = pt.scores$pred_post_step.scalar)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
pt.scores.roc3
##
## Call:
## roc.default(response = pt.wi.scores$smoker, predictor = pt.scores$pred_post_step.scalar)
##
## Data: pt.scores$pred_post_step.scalar in 29 controls (pt.wi.scores$smoker 0) < 55 cases (pt.wi.scores$smoker 1).
## Area under the curve: 0.6708
auc.specific <- data.frame("Post AUC Scalar Step", pt.scores.roc3$auc)
auc.df <- rbind(auc.df, setNames(auc.specific, names(auc.df)))
plot.roc(pt.scores.roc3, main = "Prediction of Smoker with Scalar Features Backward Stepwise -- Post")
wi_step.df <- pt.wi.scores[, c("PC1", "PC2", "PC3", "PC4", "PC5", "PC6",
"min_constriction_chg", "AUC_chg", "duration_chg",
"avg_end_percent_chg", "slope_chg", "smoker")]
m.wi_step <- glm(smoker ~ PC1 + PC2 + PC3 + PC4 +PC5 +PC6, family = "binomial", data = wi_step.df)
wi_step.res <- step(m.wi_step, direction = "backward")
## Start: AIC=104.29
## smoker ~ PC1 + PC2 + PC3 + PC4 + PC5 + PC6
##
## Df Deviance AIC
## - PC6 1 90.687 102.69
## - PC4 1 91.342 103.34
## <none> 90.287 104.29
## - PC2 1 92.960 104.96
## - PC5 1 94.017 106.02
## - PC1 1 95.533 107.53
## - PC3 1 96.253 108.25
##
## Step: AIC=102.69
## smoker ~ PC1 + PC2 + PC3 + PC4 + PC5
##
## Df Deviance AIC
## - PC4 1 91.661 101.66
## <none> 90.687 102.69
## - PC2 1 93.260 103.26
## - PC5 1 94.186 104.19
## - PC1 1 95.849 105.85
## - PC3 1 96.685 106.69
##
## Step: AIC=101.66
## smoker ~ PC1 + PC2 + PC3 + PC5
##
## Df Deviance AIC
## <none> 91.661 101.66
## - PC2 1 94.664 102.66
## - PC5 1 95.056 103.06
## - PC1 1 96.275 104.28
## - PC3 1 97.320 105.32
summary(wi_step.res)
##
## Call:
## glm(formula = smoker ~ PC1 + PC2 + PC3 + PC5, family = "binomial",
## data = wi_step.df)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.9500 -0.9776 0.5350 0.8601 1.6504
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.792529 0.266430 2.975 0.00293 **
## PC1 0.003838 0.001862 2.062 0.03923 *
## PC2 -0.006503 0.003820 -1.702 0.08870 .
## PC3 0.011260 0.005170 2.178 0.02939 *
## PC5 -0.015342 0.008694 -1.765 0.07761 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 108.267 on 83 degrees of freedom
## Residual deviance: 91.661 on 79 degrees of freedom
## AIC: 101.66
##
## Number of Fisher Scoring iterations: 4
pt.wi.scores$pred_wi_step <- predict(wi_step.res, pt.wi.scores)
pt.wi.scores.roc4 <- roc(response = pt.wi.scores$smoker,
predictor = pt.wi.scores$pred_wi_step)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
pt.wi.scores.roc4
##
## Call:
## roc.default(response = pt.wi.scores$smoker, predictor = pt.wi.scores$pred_wi_step)
##
## Data: pt.wi.scores$pred_wi_step in 29 controls (pt.wi.scores$smoker 0) < 55 cases (pt.wi.scores$smoker 1).
## Area under the curve: 0.7517
auc.specific <- data.frame("W/I Diff AUC PCs Step", pt.wi.scores.roc4$auc)
auc.df <- rbind(auc.df, setNames(auc.specific, names(auc.df)))
plot.roc(pt.wi.scores.roc4, main = "Prediction of Smoker with PCs Backward Stepwise -- W/I Diff")
### Scalar Features
m.wi_step.scalar <- glm(smoker ~ min_constriction_chg + AUC_chg + duration_chg
+ avg_end_percent_chg + slope_chg,
family = "binomial", data = wi_step.df)
wi_step.res <- step(m.wi_step.scalar, direction = "backward")
## Start: AIC=111.94
## smoker ~ min_constriction_chg + AUC_chg + duration_chg + avg_end_percent_chg +
## slope_chg
##
## Df Deviance AIC
## - AUC_chg 1 99.951 109.95
## - duration_chg 1 99.980 109.98
## - avg_end_percent_chg 1 100.058 110.06
## - slope_chg 1 100.064 110.06
## <none> 99.942 111.94
## - min_constriction_chg 1 102.616 112.62
##
## Step: AIC=109.95
## smoker ~ min_constriction_chg + duration_chg + avg_end_percent_chg +
## slope_chg
##
## Df Deviance AIC
## - duration_chg 1 99.985 107.98
## - slope_chg 1 100.066 108.07
## - avg_end_percent_chg 1 100.242 108.24
## <none> 99.951 109.95
## - min_constriction_chg 1 105.346 113.35
##
## Step: AIC=107.98
## smoker ~ min_constriction_chg + avg_end_percent_chg + slope_chg
##
## Df Deviance AIC
## - slope_chg 1 100.094 106.09
## - avg_end_percent_chg 1 100.266 106.27
## <none> 99.985 107.98
## - min_constriction_chg 1 105.370 111.37
##
## Step: AIC=106.09
## smoker ~ min_constriction_chg + avg_end_percent_chg
##
## Df Deviance AIC
## - avg_end_percent_chg 1 100.29 104.29
## <none> 100.09 106.09
## - min_constriction_chg 1 107.26 111.26
##
## Step: AIC=104.29
## smoker ~ min_constriction_chg
##
## Df Deviance AIC
## <none> 100.29 104.29
## - min_constriction_chg 1 108.27 110.27
summary(wi_step.res)
##
## Call:
## glm(formula = smoker ~ min_constriction_chg, family = "binomial",
## data = wi_step.df)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.7903 -1.2229 0.6708 0.9059 1.6723
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.02997 0.33797 -0.089 0.92933
## min_constriction_chg 0.07165 0.02745 2.611 0.00904 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 108.27 on 83 degrees of freedom
## Residual deviance: 100.29 on 82 degrees of freedom
## AIC: 104.29
##
## Number of Fisher Scoring iterations: 4
pt.wi.scores$pred_wi_step.scalar <- predict(wi_step.res, pt.wi.scores)
pt.wi.scores.roc4 <- roc(response = pt.wi.scores$smoker,
predictor = pt.wi.scores$pred_wi_step.scalar)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
pt.wi.scores.roc4
##
## Call:
## roc.default(response = pt.wi.scores$smoker, predictor = pt.wi.scores$pred_wi_step.scalar)
##
## Data: pt.wi.scores$pred_wi_step.scalar in 29 controls (pt.wi.scores$smoker 0) < 55 cases (pt.wi.scores$smoker 1).
## Area under the curve: 0.6765
auc.specific <- data.frame("W/I Diff AUC Scalar Step", pt.wi.scores.roc4$auc)
auc.df <- rbind(auc.df, setNames(auc.specific, names(auc.df)))
plot.roc(pt.wi.scores.roc4, main = "Prediction of Smoker with Scalar Features Backward Stepwise -- W/I Diff")
pt.analytic.df <- pt.df[pt.df$subject_id %in% analytic.N, ]
pt.analytic.df$occasional <- ifelse(pt.analytic.df$user_cat == "occasional", 1, 0)
pt.analytic.df$daily <- ifelse(pt.analytic.df$user_cat == "daily", 1, 0)
pt.analytic.df <- pt.analytic.df[order(pt.analytic.df$subject_id), ]
right_0.post.w <- right_0.post.w[order(right_0.post.w$subject_id), ]
post_pffr.df <- data.frame("subject_id" = pt.analytic.df$subject_id,
"occ_user" = pt.analytic.df$occasional,
"daily_user" = pt.analytic.df$daily,
"pct_chg" = I(as.matrix(right_0.post.w[, c(4:404)])))
post_pffr.df$subject_id <- as.factor(post_pffr.df$subject_id)
m.post_pffr <- pffr(pct_chg ~ occ_user + daily_user +s(subject_id, bs="re"),
data = post_pffr.df,
algorithm = "bam",
method = "fREML",
discrete = T)
summary(m.post_pffr)
##
## Family: gaussian
## Link function: identity
##
## Formula:
## pct_chg ~ occ_user + daily_user + s(subject_id, bs = "re")
##
## Constant coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -9.9809 0.5697 -17.52 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Smooth terms & functional coefficients:
## edf Ref.df F p-value
## Intercept(yindex) 18.625 19.000 1118.34 <2e-16 ***
## occ_user(yindex) 4.874 4.942 17.87 <2e-16 ***
## daily_user(yindex) 4.293 4.572 11.77 <2e-16 ***
## NA 401.244 411.000 998.94 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.944 Deviance explained = 94.5%
## fREML score = 71156 Scale est. = 4.2581 n = 32558 (84 x 401)
par(mfrow = c(1, 3))
plot(m.post_pffr)
What does NA in the output mean?
head(right_0.df)
## subject_id tp user_cat frame percent_change
## 831 001-002 post daily 0 0.000000000
## 832 001-002 post daily 1 0.011262403
## 833 001-002 post daily 2 0.002931787
## 834 001-002 post daily 3 -0.026396608
## 835 001-002 post daily 4 -0.078127539
## 836 001-002 post daily 5 -0.153665767
length(unique(right_0.df$subject_id))
## [1] 84
right_0.gam.df <- merge(right_0.post, pt.analytic.df,
by = c("subject_id", "user_cat"))
right_0.gam.df$sind <- right_0.gam.df$frame/400
head(right_0.gam.df)
## subject_id user_cat tp frame percent_change demo_age demo_weight
## 1 001-002 daily post 0 0.000000000 34.93 250
## 2 001-002 daily post 1 0.011262403 34.93 250
## 3 001-002 daily post 2 0.002931787 34.93 250
## 4 001-002 daily post 3 -0.026396608 34.93 250
## 5 001-002 daily post 4 -0.078127539 34.93 250
## 6 001-002 daily post 5 -0.153665767 34.93 250
## demo_height demo_gender eye thc.post min_constriction.post duration.post
## 1 71 Male Right 146.7 -15.04075 335
## 2 71 Male Right 146.7 -15.04075 335
## 3 71 Male Right 146.7 -15.04075 335
## 4 71 Male Right 146.7 -15.04075 335
## 5 71 Male Right 146.7 -15.04075 335
## 6 71 Male Right 146.7 -15.04075 335
## avg_end_percent.post slope.post AUC.post thc.pre2 min_constriction.pre2
## 1 -4.432906 0.03958152 -9.542175 22.8 -35.71649
## 2 -4.432906 0.03958152 -9.542175 22.8 -35.71649
## 3 -4.432906 0.03958152 -9.542175 22.8 -35.71649
## 4 -4.432906 0.03958152 -9.542175 22.8 -35.71649
## 5 -4.432906 0.03958152 -9.542175 22.8 -35.71649
## 6 -4.432906 0.03958152 -9.542175 22.8 -35.71649
## duration.pre2 avg_end_percent.pre2 slope.pre2 AUC.pre2 thc_chg BMI
## 1 440 -9.798057 0.0762307 -21.95031 123.9 34.86411
## 2 440 -9.798057 0.0762307 -21.95031 123.9 34.86411
## 3 440 -9.798057 0.0762307 -21.95031 123.9 34.86411
## 4 440 -9.798057 0.0762307 -21.95031 123.9 34.86411
## 5 440 -9.798057 0.0762307 -21.95031 123.9 34.86411
## 6 440 -9.798057 0.0762307 -21.95031 123.9 34.86411
## min_constriction_chg AUC_chg duration_chg avg_end_percent_chg slope_chg
## 1 20.67574 12.40813 -105 5.36515 -0.03664918
## 2 20.67574 12.40813 -105 5.36515 -0.03664918
## 3 20.67574 12.40813 -105 5.36515 -0.03664918
## 4 20.67574 12.40813 -105 5.36515 -0.03664918
## 5 20.67574 12.40813 -105 5.36515 -0.03664918
## 6 20.67574 12.40813 -105 5.36515 -0.03664918
## postConsumpTimeToTest occasional daily sind
## 1 59 0 1 0.0000
## 2 59 0 1 0.0025
## 3 59 0 1 0.0050
## 4 59 0 1 0.0075
## 5 59 0 1 0.0100
## 6 59 0 1 0.0125
length(unique(right_0.gam.df$subject_id))
## [1] 84
m.post_gam <- mgcv::gam(percent_change ~ s(sind, k=30, bs="cr") + s(sind, by=occasional, k=30, bs = "cr") + s(sind, by=daily, k=30, bs = "cr"),
data = right_0.gam.df, method = "REML")
summary(m.post_gam)
##
## Family: gaussian
## Link function: identity
##
## Formula:
## percent_change ~ s(sind, k = 30, bs = "cr") + s(sind, by = occasional,
## k = 30, bs = "cr") + s(sind, by = daily, k = 30, bs = "cr")
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -12.23457 0.07143 -171.3 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(sind) 22.048 25.47 198.52 <2e-16 ***
## s(sind):occasional 9.715 11.82 30.17 <2e-16 ***
## s(sind):daily 9.025 10.98 18.17 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.244 Deviance explained = 24.5%
## -REML = 1.1227e+05 Scale est. = 57.701 n = 32558
right_0.pt.w <- right_0.pt.w[order(right_0.pt.w$subject_id), ]
wi_pffr.df <- data.frame("subject_id" = pt.analytic.df$subject_id,
"occ_user" = pt.analytic.df$occasional,
"daily_user" = pt.analytic.df$daily,
"wi_pct_chg" = I(as.matrix(right_0.pt.w[, c(3:403)])))
wi_pffr.df$subject_id <- as.factor(wi_pffr.df$subject_id)
m.wi_pffr <- pffr(wi_pct_chg ~ occ_user + daily_user + s(subject_id, bs="re"),
data = wi_pffr.df,
algorithm = "bam",
method = "fREML",
discrete = T)
summary(m.wi_pffr)
##
## Family: gaussian
## Link function: identity
##
## Formula:
## wi_pct_chg ~ occ_user + daily_user + s(subject_id, bs = "re")
##
## Constant coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.5930 0.7301 6.291 3.2e-10 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Smooth terms & functional coefficients:
## edf Ref.df F p-value
## Intercept(yindex) 16.833 19.000 71.641 < 2e-16 ***
## occ_user(yindex) 1.001 1.002 7.107 0.00769 **
## daily_user(yindex) 1.001 1.002 0.203 0.65261
## NA 402.337 413.000 459.479 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.871 Deviance explained = 87.3%
## fREML score = 86949 Scale est. = 12.604 n = 31934 (84 x 401)
par(mfrow = c(1,3))
plot(m.wi_pffr)
head(right_0.pt)
## subject_id user_cat frame wiPctChg
## 831 001-002 daily 0 0.0000000000
## 832 001-002 daily 1 0.0003702518
## 833 001-002 daily 2 -0.0079714384
## 834 001-002 daily 3 -0.0254202248
## 835 001-002 daily 4 -0.0523712614
## 836 001-002 daily 5 -0.0892197022
length(unique(right_0.pt$subject_id))
## [1] 84
right_0.pt.gam.df <- merge(right_0.pt, pt.analytic.df,
by = c("subject_id", "user_cat"))
right_0.pt.gam.df$sind <- right_0.pt.gam.df$frame/400
head(right_0.pt.gam.df)
## subject_id user_cat frame wiPctChg demo_age demo_weight demo_height
## 1 001-002 daily 0 0.0000000000 34.93 250 71
## 2 001-002 daily 1 0.0003702518 34.93 250 71
## 3 001-002 daily 2 -0.0079714384 34.93 250 71
## 4 001-002 daily 3 -0.0254202248 34.93 250 71
## 5 001-002 daily 4 -0.0523712614 34.93 250 71
## 6 001-002 daily 5 -0.0892197022 34.93 250 71
## demo_gender eye thc.post min_constriction.post duration.post
## 1 Male Right 146.7 -15.04075 335
## 2 Male Right 146.7 -15.04075 335
## 3 Male Right 146.7 -15.04075 335
## 4 Male Right 146.7 -15.04075 335
## 5 Male Right 146.7 -15.04075 335
## 6 Male Right 146.7 -15.04075 335
## avg_end_percent.post slope.post AUC.post thc.pre2 min_constriction.pre2
## 1 -4.432906 0.03958152 -9.542175 22.8 -35.71649
## 2 -4.432906 0.03958152 -9.542175 22.8 -35.71649
## 3 -4.432906 0.03958152 -9.542175 22.8 -35.71649
## 4 -4.432906 0.03958152 -9.542175 22.8 -35.71649
## 5 -4.432906 0.03958152 -9.542175 22.8 -35.71649
## 6 -4.432906 0.03958152 -9.542175 22.8 -35.71649
## duration.pre2 avg_end_percent.pre2 slope.pre2 AUC.pre2 thc_chg BMI
## 1 440 -9.798057 0.0762307 -21.95031 123.9 34.86411
## 2 440 -9.798057 0.0762307 -21.95031 123.9 34.86411
## 3 440 -9.798057 0.0762307 -21.95031 123.9 34.86411
## 4 440 -9.798057 0.0762307 -21.95031 123.9 34.86411
## 5 440 -9.798057 0.0762307 -21.95031 123.9 34.86411
## 6 440 -9.798057 0.0762307 -21.95031 123.9 34.86411
## min_constriction_chg AUC_chg duration_chg avg_end_percent_chg slope_chg
## 1 20.67574 12.40813 -105 5.36515 -0.03664918
## 2 20.67574 12.40813 -105 5.36515 -0.03664918
## 3 20.67574 12.40813 -105 5.36515 -0.03664918
## 4 20.67574 12.40813 -105 5.36515 -0.03664918
## 5 20.67574 12.40813 -105 5.36515 -0.03664918
## 6 20.67574 12.40813 -105 5.36515 -0.03664918
## postConsumpTimeToTest occasional daily sind
## 1 59 0 1 0.0000
## 2 59 0 1 0.0025
## 3 59 0 1 0.0050
## 4 59 0 1 0.0075
## 5 59 0 1 0.0100
## 6 59 0 1 0.0125
length(unique(right_0.pt.gam.df$subject_id))
## [1] 84
m.wi_gam <- mgcv::gam(wiPctChg ~ s(sind, k=30, bs="cr") + s(sind, by=occasional, k=30, bs = "cr") + s(sind, by=daily, k=30, bs = "cr"),
data = right_0.pt.gam.df, method = "REML")
summary(m.wi_gam)
##
## Family: gaussian
## Link function: identity
##
## Formula:
## wiPctChg ~ s(sind, k = 30, bs = "cr") + s(sind, by = occasional,
## k = 30, bs = "cr") + s(sind, by = daily, k = 30, bs = "cr")
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.42214 0.08812 50.19 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(sind) 11.13 13.72 54.67 <2e-16 ***
## s(sind):occasional 12.82 15.57 82.93 <2e-16 ***
## s(sind):daily 11.09 13.50 52.10 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.121 Deviance explained = 12.2%
## -REML = 1.1646e+05 Scale est. = 85.886 n = 31934